This commit is contained in:
2022-10-23 01:58:47 +02:00
parent bf56a1e0ac
commit 2413b25cf2
15 changed files with 800 additions and 0 deletions

View File

@@ -0,0 +1,83 @@
#!usr/bin/perl
use strict;
use warnings;
use Data::Dumper;
use HTML::TreeBuilder::XPath;
use feature qw(say);
use LWP::UserAgent ();
use URI;
use URI::Escape qw( uri_unescape );
use File::Basename;
use Getopt::Long qw(GetOptions);
my %config = (
'url' => undef,
'image_dir' => './images',
);
my $ua = LWP::UserAgent->new(timeout => 10, agent => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36');
$ua->env_proxy;
&GetCommandFlags();
&GetPictures();
sub GetPictures {
my $response = $ua->get('https://vstorrent.info/category/daw-pluginss/page/2/');
if ($response->is_success) {
# print $response->decoded_content;
# Get @url list from overview page
my $tree = HTML::TreeBuilder::XPath->new_from_content($response->decoded_content);
my @urls = $tree->findvalues('//h2[@class="entry-title"]/a/@href');
print Dumper \@urls;
# Get detail page and fetch picture from there
for my $url (@urls) {
say "Getting $url";
my $response2 = $ua->get($url);
if ($response2->is_success) {
# print $response->decoded_content;
# Parse detail page
my $tree2 = HTML::TreeBuilder::XPath->new_from_content($response2->decoded_content);
my $img_src = $tree2->findvalue('//div[@class="entry-content"]/h2/img/@src|//div[@class="entry-content"]/p/img/@src');
my $img_title = $tree2->findvalue('//h1[@class="entry-title"]');
say "\t-> " . $img_title;
say "\t-> " . $img_src;
# Save picture
my $last_in_url = uri_unescape( basename( URI->new( $img_src )->path ) );
my ($ext) = $img_src =~ /(\.[^.]+)$/;
# my $file = $config{'image_dir'} . "/$last_in_url";
my $file = $config{'image_dir'} . "/$img_title$ext";
say "\t-> " . $file;
my $req = HTTP::Request->new(GET => $img_src);
my $res = $ua->request($req, $file);
}
}
}
else {
die $response->status_line;
}
}
sub GetCommandFlags {
GetOptions(
'url=s' => \$config{'url'},
) or die;
if( !$config{'url'} ) {
die "--url has no arguments.";
}
}
# my $content = get('https://vstorrent.info/category/daw-pluginss/page/2/');
# print Dumper $content;