init
This commit is contained in:
83
vstorrent_crawl (SFConflict zino@zinomedia.de 2020-10-22-15-04-00).pl
Executable file
83
vstorrent_crawl (SFConflict zino@zinomedia.de 2020-10-22-15-04-00).pl
Executable file
@@ -0,0 +1,83 @@
|
||||
#!usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
use Data::Dumper;
|
||||
use HTML::TreeBuilder::XPath;
|
||||
use feature qw(say);
|
||||
use LWP::UserAgent ();
|
||||
use URI;
|
||||
use URI::Escape qw( uri_unescape );
|
||||
use File::Basename;
|
||||
use Getopt::Long qw(GetOptions);
|
||||
|
||||
my %config = (
|
||||
'url' => undef,
|
||||
'image_dir' => './images',
|
||||
|
||||
);
|
||||
|
||||
|
||||
my $ua = LWP::UserAgent->new(timeout => 10, agent => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36');
|
||||
$ua->env_proxy;
|
||||
|
||||
&GetCommandFlags();
|
||||
&GetPictures();
|
||||
|
||||
|
||||
sub GetPictures {
|
||||
my $response = $ua->get('https://vstorrent.info/category/daw-pluginss/page/2/');
|
||||
if ($response->is_success) {
|
||||
# print $response->decoded_content;
|
||||
|
||||
# Get @url list from overview page
|
||||
my $tree = HTML::TreeBuilder::XPath->new_from_content($response->decoded_content);
|
||||
my @urls = $tree->findvalues('//h2[@class="entry-title"]/a/@href');
|
||||
print Dumper \@urls;
|
||||
|
||||
# Get detail page and fetch picture from there
|
||||
for my $url (@urls) {
|
||||
say "Getting $url";
|
||||
my $response2 = $ua->get($url);
|
||||
if ($response2->is_success) {
|
||||
# print $response->decoded_content;
|
||||
|
||||
# Parse detail page
|
||||
my $tree2 = HTML::TreeBuilder::XPath->new_from_content($response2->decoded_content);
|
||||
my $img_src = $tree2->findvalue('//div[@class="entry-content"]/h2/img/@src|//div[@class="entry-content"]/p/img/@src');
|
||||
my $img_title = $tree2->findvalue('//h1[@class="entry-title"]');
|
||||
say "\t-> " . $img_title;
|
||||
say "\t-> " . $img_src;
|
||||
|
||||
# Save picture
|
||||
my $last_in_url = uri_unescape( basename( URI->new( $img_src )->path ) );
|
||||
my ($ext) = $img_src =~ /(\.[^.]+)$/;
|
||||
# my $file = $config{'image_dir'} . "/$last_in_url";
|
||||
my $file = $config{'image_dir'} . "/$img_title$ext";
|
||||
say "\t-> " . $file;
|
||||
my $req = HTTP::Request->new(GET => $img_src);
|
||||
my $res = $ua->request($req, $file);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
die $response->status_line;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
sub GetCommandFlags {
|
||||
GetOptions(
|
||||
'url=s' => \$config{'url'},
|
||||
) or die;
|
||||
|
||||
|
||||
if( !$config{'url'} ) {
|
||||
die "--url has no arguments.";
|
||||
}
|
||||
}
|
||||
|
||||
# my $content = get('https://vstorrent.info/category/daw-pluginss/page/2/');
|
||||
# print Dumper $content;
|
||||
Reference in New Issue
Block a user