init
BIN
images/Ample Ethno Ukulele III v3.2 [WIN & MACOSX].jpg
Executable file
|
After Width: | Height: | Size: 42 KiB |
BIN
images/Ample Guitar LP v3.2 [WIN & MACOSX].jpg
Executable file
|
After Width: | Height: | Size: 43 KiB |
BIN
images/StudioLinked Dope FX v1.0 [WIN MACOSX].jpg
Executable file
|
After Width: | Height: | Size: 40 KiB |
BIN
images/StudioLinked Reverse Station v1.0 [WIN MACOSX].png
Executable file
|
After Width: | Height: | Size: 105 KiB |
BIN
images/UJAM Beatmaker Bundle 2 VST2 AAX [WIN].jpg
Executable file
|
After Width: | Height: | Size: 26 KiB |
BIN
images/UJAM Finisher Bundle VST AAX [WIN].jpg
Executable file
|
After Width: | Height: | Size: 56 KiB |
BIN
images/UJAM Virtual Bassist DANDY v2.1.1 VST AAX [WIN].jpg
Executable file
|
After Width: | Height: | Size: 43 KiB |
BIN
images/UJAM Virtual Bassist MELLOW v2.1.1 VST AAX [WIN].jpg
Executable file
|
After Width: | Height: | Size: 40 KiB |
BIN
images/UJAM Virtual Bassist ROWDY v2.1.1 VST AAX [WIN].jpg
Executable file
|
After Width: | Height: | Size: 40 KiB |
BIN
images/UJAM Virtual Bassist ROYAL v2.1.1 VST AAX [WIN].jpg
Executable file
|
After Width: | Height: | Size: 41 KiB |
0
processed/jul-aug-note
Executable file
588
singleFile/1.html
Executable file
46
vstorrent.pl
Executable file
@@ -0,0 +1,46 @@
|
|||||||
|
#!usr/bin/perl
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use Data::Dumper;
|
||||||
|
use Path::Tiny qw(path);
|
||||||
|
use MIME::Base64 qw(decode_base64);
|
||||||
|
use open ':std', ':encoding(UTF-8)';
|
||||||
|
|
||||||
|
my $dir = './singleFile';
|
||||||
|
my @files = glob ( "$dir/*.html" );
|
||||||
|
&getImagesfromHTML();
|
||||||
|
|
||||||
|
sub getImagesfromHTML {
|
||||||
|
&Delimiter((caller(0))[3]);
|
||||||
|
|
||||||
|
for my $i (0 .. $#files) {
|
||||||
|
my $filename = $files[$i];
|
||||||
|
my $content = path($filename)->slurp_utf8;
|
||||||
|
$content =~ s/\R//g;
|
||||||
|
|
||||||
|
my $i = 0;
|
||||||
|
while ($content =~ m/<div class=excerpt-thumb>(.+?)<\/div>/g) {
|
||||||
|
$i++;
|
||||||
|
my $dataset = $1;
|
||||||
|
$dataset =~ m/title="(.+?)"/;
|
||||||
|
my $title = $1;
|
||||||
|
$title =~ s/Permalink to //;
|
||||||
|
$dataset =~ m/src=(.+?) /;
|
||||||
|
my $src = $1;
|
||||||
|
$src =~ m/data:image\/(.+?)\;base64,/;
|
||||||
|
my $filetype = $1;
|
||||||
|
$src =~ s/data:image\/(.+?)\;base64,//;
|
||||||
|
my $decoded= decode_base64($src);
|
||||||
|
open my $fh, '>', "./images/$title.$filetype" or die $!;
|
||||||
|
binmode $fh;
|
||||||
|
print $fh $decoded;
|
||||||
|
close $fh;
|
||||||
|
print "Title: $title\nFiletype: $filetype\n\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sub Delimiter {
|
||||||
|
my $SubName = shift;
|
||||||
|
print "\n" . "-" x 80 . "\nSUB " . $SubName . "\n" . '-' x 80 . "\n";
|
||||||
|
}
|
||||||
83
vstorrent_crawl (SFConflict zino@zinomedia.de 2020-10-22-15-04-00).pl
Executable file
@@ -0,0 +1,83 @@
|
|||||||
|
#!usr/bin/perl
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use Data::Dumper;
|
||||||
|
use HTML::TreeBuilder::XPath;
|
||||||
|
use feature qw(say);
|
||||||
|
use LWP::UserAgent ();
|
||||||
|
use URI;
|
||||||
|
use URI::Escape qw( uri_unescape );
|
||||||
|
use File::Basename;
|
||||||
|
use Getopt::Long qw(GetOptions);
|
||||||
|
|
||||||
|
my %config = (
|
||||||
|
'url' => undef,
|
||||||
|
'image_dir' => './images',
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
my $ua = LWP::UserAgent->new(timeout => 10, agent => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36');
|
||||||
|
$ua->env_proxy;
|
||||||
|
|
||||||
|
&GetCommandFlags();
|
||||||
|
&GetPictures();
|
||||||
|
|
||||||
|
|
||||||
|
sub GetPictures {
|
||||||
|
my $response = $ua->get('https://vstorrent.info/category/daw-pluginss/page/2/');
|
||||||
|
if ($response->is_success) {
|
||||||
|
# print $response->decoded_content;
|
||||||
|
|
||||||
|
# Get @url list from overview page
|
||||||
|
my $tree = HTML::TreeBuilder::XPath->new_from_content($response->decoded_content);
|
||||||
|
my @urls = $tree->findvalues('//h2[@class="entry-title"]/a/@href');
|
||||||
|
print Dumper \@urls;
|
||||||
|
|
||||||
|
# Get detail page and fetch picture from there
|
||||||
|
for my $url (@urls) {
|
||||||
|
say "Getting $url";
|
||||||
|
my $response2 = $ua->get($url);
|
||||||
|
if ($response2->is_success) {
|
||||||
|
# print $response->decoded_content;
|
||||||
|
|
||||||
|
# Parse detail page
|
||||||
|
my $tree2 = HTML::TreeBuilder::XPath->new_from_content($response2->decoded_content);
|
||||||
|
my $img_src = $tree2->findvalue('//div[@class="entry-content"]/h2/img/@src|//div[@class="entry-content"]/p/img/@src');
|
||||||
|
my $img_title = $tree2->findvalue('//h1[@class="entry-title"]');
|
||||||
|
say "\t-> " . $img_title;
|
||||||
|
say "\t-> " . $img_src;
|
||||||
|
|
||||||
|
# Save picture
|
||||||
|
my $last_in_url = uri_unescape( basename( URI->new( $img_src )->path ) );
|
||||||
|
my ($ext) = $img_src =~ /(\.[^.]+)$/;
|
||||||
|
# my $file = $config{'image_dir'} . "/$last_in_url";
|
||||||
|
my $file = $config{'image_dir'} . "/$img_title$ext";
|
||||||
|
say "\t-> " . $file;
|
||||||
|
my $req = HTTP::Request->new(GET => $img_src);
|
||||||
|
my $res = $ua->request($req, $file);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
die $response->status_line;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sub GetCommandFlags {
|
||||||
|
GetOptions(
|
||||||
|
'url=s' => \$config{'url'},
|
||||||
|
) or die;
|
||||||
|
|
||||||
|
|
||||||
|
if( !$config{'url'} ) {
|
||||||
|
die "--url has no arguments.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# my $content = get('https://vstorrent.info/category/daw-pluginss/page/2/');
|
||||||
|
# print Dumper $content;
|
||||||
83
vstorrent_crawl.pl
Executable file
@@ -0,0 +1,83 @@
|
|||||||
|
#!usr/bin/perl
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use Data::Dumper;
|
||||||
|
use HTML::TreeBuilder::XPath;
|
||||||
|
use feature qw(say);
|
||||||
|
use LWP::UserAgent ();
|
||||||
|
use URI;
|
||||||
|
use URI::Escape qw( uri_unescape );
|
||||||
|
use File::Basename;
|
||||||
|
use Getopt::Long qw(GetOptions);
|
||||||
|
|
||||||
|
my %config = (
|
||||||
|
'url' => undef,
|
||||||
|
'image_dir' => './images',
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
my $ua = LWP::UserAgent->new(timeout => 10, agent => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36');
|
||||||
|
$ua->env_proxy;
|
||||||
|
|
||||||
|
&GetCommandFlags();
|
||||||
|
&GetPictures();
|
||||||
|
|
||||||
|
|
||||||
|
sub GetPictures {
|
||||||
|
my $response = $ua->get('https://vstorrent.info/category/daw-pluginss/page/2/');
|
||||||
|
if ($response->is_success) {
|
||||||
|
# print $response->decoded_content;
|
||||||
|
|
||||||
|
# Get @url list from overview page
|
||||||
|
my $tree = HTML::TreeBuilder::XPath->new_from_content($response->decoded_content);
|
||||||
|
my @urls = $tree->findvalues('//h2[@class="entry-title"]/a/@href');
|
||||||
|
print Dumper \@urls;
|
||||||
|
|
||||||
|
# Get detail page and fetch picture from there
|
||||||
|
for my $url (@urls) {
|
||||||
|
say "Getting $url";
|
||||||
|
my $response2 = $ua->get($url);
|
||||||
|
if ($response2->is_success) {
|
||||||
|
# print $response->decoded_content;
|
||||||
|
|
||||||
|
# Parse detail page
|
||||||
|
my $tree2 = HTML::TreeBuilder::XPath->new_from_content($response2->decoded_content);
|
||||||
|
my $img_src = $tree2->findvalue('//div[@class="entry-content"]/h2/img/@src|//div[@class="entry-content"]/p/img/@src');
|
||||||
|
my $img_title = $tree2->findvalue('//h1[@class="entry-title"]');
|
||||||
|
say "\t-> " . $img_title;
|
||||||
|
say "\t-> " . $img_src;
|
||||||
|
|
||||||
|
# Save picture
|
||||||
|
my $last_in_url = uri_unescape( basename( URI->new( $img_src )->path ) );
|
||||||
|
my ($ext) = $img_src =~ /(\.[^.]+)$/;
|
||||||
|
# my $file = $config{'image_dir'} . "/$last_in_url";
|
||||||
|
my $file = $config{'image_dir'} . "/$img_title$ext";
|
||||||
|
say "\t-> " . $file;
|
||||||
|
my $req = HTTP::Request->new(GET => $img_src);
|
||||||
|
my $res = $ua->request($req, $file);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
die $response->status_line;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
sub GetCommandFlags {
|
||||||
|
GetOptions(
|
||||||
|
'url=s' => \$config{'url'},
|
||||||
|
) or die;
|
||||||
|
|
||||||
|
|
||||||
|
if( !$config{'url'} ) {
|
||||||
|
die "--url has no arguments.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# my $content = get('https://vstorrent.info/category/daw-pluginss/page/2/');
|
||||||
|
# print Dumper $content;
|
||||||