init commit

This commit is contained in:
zino
2021-09-20 01:21:50 +02:00
parent 3540980d38
commit 4eac34603f
4 changed files with 2617 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
# .gitignore
node_modules
extract
dl

2493
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

22
package.json Normal file
View File

@@ -0,0 +1,22 @@
{
"name": "mapgrab",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"devDependencies": {
"axios": "^0.21.4",
"download": "^8.0.0",
"xmldom": "^0.6.0",
"xpath": "^0.0.32"
},
"dependencies": {
"follow-redirects": "^1.14.4",
"unzipper": "^0.10.11"
}
}

98
scraper.js Normal file
View File

@@ -0,0 +1,98 @@
var xpath = require('xpath'),
dom = require('xmldom').DOMParser;
const {
http,
https
} = require('follow-redirects');
const fs = require('fs');
const axios = require('axios');
const glob = require('glob');
const unzipper = require('unzipper');
const data = {
fy_Maps: { location: `${__dirname}/dl/fy`, maxPage: 21 },
as_Maps: { location: `${__dirname}/dl/as`, maxPage: 4 },
de_Maps: { location: `${__dirname}/dl/de`, maxPage: 26 },
cs_Maps: { location: `${__dirname}/dl/cs`, maxPage: 20 },
climbing_maps: { location: `${__dirname}/dl/climbing`, maxPage: 10 },
}
//downloadMaps('fy_Maps'); // must be fy_Maps, climbing_maps, as_Maps, cs_Maps, de_Maps, Map%2BPacks, WAD%2BFiles, pa_Maps%252CHybrid%2BMaps, Aktuelle%2BMaps
unzipMaps('fy_Maps');
function downloadMaps(inCategory) {
console.log(`Category: ${data[inCategory].maxPage}`);
for (let index = 0; index <= data[inCategory].maxPage; index++) {
console.log(index);
// create overview url
const overviewUrl = `https://www.4players.de/cs.php/download_list/-/cs1.6/Maps/${inCategory}/-/-/-/${index}/index.html`;
axios.get(overviewUrl).then(function (response) {
// extract nodes using xpath
const doc = new dom().parseFromString(response.data);
const nodes = xpath.select("//a[contains(@class, 'button') and text() = 'Download']/@href", doc);
nodes.forEach(element => {
// generate download url
const id = (element.value.split("/"))[14];
const dlUrl = `https://www.4players.de/cs.php/download_start/-/download/${id}/1/index.html`;
https.get(dlUrl, response => {
// get zip filename by following redirect and create filepath
const zipName = (response.responseUrl.split("/")).pop();
const filePath = `${data[inCategory].location}/${zipName}`;
// download file
const file = fs.createWriteStream(filePath);
http.get(response.responseUrl, function (response) {
response.pipe(file);
});
}).on('error', err => {
console.error(err);
});
});
}).catch(function (error) {
// handle error
console.log(error);
}).then(function () {
// always executed
});
}
}
function unzipMaps(inCategory) {
glob(`${data[inCategory].location}/*.zip`, {}, (err, files) => {
console.log(files)
for (let index = 0; index < files.length; index++) {
const element = files[index];
fs.createReadStream(element)
.pipe(unzipper.Parse())
.on('entry', function (entry) {
const fileName = entry.path.toLowerCase();
const type = entry.type; // 'Directory' or 'File'
//const size = entry.vars.uncompressedSize; // There is also compressedSize;
if (type === 'File') {
const paths = fileName.split("/");
if (paths.indexOf("podbot") !== -1 || paths.indexOf("podbots") !== -1)
return;
const index = paths.indexOf("cstrike");
if (index !== -1) {
const dir = paths.slice(index + 1, paths.length - 1).join("/");
const fullDirPath = `${__dirname}/extract/${dir}`;
fs.mkdirSync(fullDirPath, { recursive: true });
const filePath = paths.slice(index + 1, paths.length).join("/");
const extractFilePath = `${__dirname}/extract/${filePath}`;
entry.pipe(fs.createWriteStream(extractFilePath));
}
}
});
}
})
}