init commit

This commit is contained in:
zino
2021-09-20 01:21:50 +02:00
parent 3540980d38
commit 4eac34603f
4 changed files with 2617 additions and 0 deletions

98
scraper.js Normal file
View File

@@ -0,0 +1,98 @@
var xpath = require('xpath'),
dom = require('xmldom').DOMParser;
const {
http,
https
} = require('follow-redirects');
const fs = require('fs');
const axios = require('axios');
const glob = require('glob');
const unzipper = require('unzipper');
const data = {
fy_Maps: { location: `${__dirname}/dl/fy`, maxPage: 21 },
as_Maps: { location: `${__dirname}/dl/as`, maxPage: 4 },
de_Maps: { location: `${__dirname}/dl/de`, maxPage: 26 },
cs_Maps: { location: `${__dirname}/dl/cs`, maxPage: 20 },
climbing_maps: { location: `${__dirname}/dl/climbing`, maxPage: 10 },
}
//downloadMaps('fy_Maps'); // must be fy_Maps, climbing_maps, as_Maps, cs_Maps, de_Maps, Map%2BPacks, WAD%2BFiles, pa_Maps%252CHybrid%2BMaps, Aktuelle%2BMaps
unzipMaps('fy_Maps');
function downloadMaps(inCategory) {
console.log(`Category: ${data[inCategory].maxPage}`);
for (let index = 0; index <= data[inCategory].maxPage; index++) {
console.log(index);
// create overview url
const overviewUrl = `https://www.4players.de/cs.php/download_list/-/cs1.6/Maps/${inCategory}/-/-/-/${index}/index.html`;
axios.get(overviewUrl).then(function (response) {
// extract nodes using xpath
const doc = new dom().parseFromString(response.data);
const nodes = xpath.select("//a[contains(@class, 'button') and text() = 'Download']/@href", doc);
nodes.forEach(element => {
// generate download url
const id = (element.value.split("/"))[14];
const dlUrl = `https://www.4players.de/cs.php/download_start/-/download/${id}/1/index.html`;
https.get(dlUrl, response => {
// get zip filename by following redirect and create filepath
const zipName = (response.responseUrl.split("/")).pop();
const filePath = `${data[inCategory].location}/${zipName}`;
// download file
const file = fs.createWriteStream(filePath);
http.get(response.responseUrl, function (response) {
response.pipe(file);
});
}).on('error', err => {
console.error(err);
});
});
}).catch(function (error) {
// handle error
console.log(error);
}).then(function () {
// always executed
});
}
}
function unzipMaps(inCategory) {
glob(`${data[inCategory].location}/*.zip`, {}, (err, files) => {
console.log(files)
for (let index = 0; index < files.length; index++) {
const element = files[index];
fs.createReadStream(element)
.pipe(unzipper.Parse())
.on('entry', function (entry) {
const fileName = entry.path.toLowerCase();
const type = entry.type; // 'Directory' or 'File'
//const size = entry.vars.uncompressedSize; // There is also compressedSize;
if (type === 'File') {
const paths = fileName.split("/");
if (paths.indexOf("podbot") !== -1 || paths.indexOf("podbots") !== -1)
return;
const index = paths.indexOf("cstrike");
if (index !== -1) {
const dir = paths.slice(index + 1, paths.length - 1).join("/");
const fullDirPath = `${__dirname}/extract/${dir}`;
fs.mkdirSync(fullDirPath, { recursive: true });
const filePath = paths.slice(index + 1, paths.length).join("/");
const extractFilePath = `${__dirname}/extract/${filePath}`;
entry.pipe(fs.createWriteStream(extractFilePath));
}
}
});
}
})
}