#!/usr/bin/env perl

# https://cdn.rfxn.com/downloads/maldet-cleanv2.tgz
# https://cdn.rfxn.com/downloads/maldet-cleanv2.tgz.md5
# https://cdn.rfxn.com/downloads/maldet.current.hash
# https://cdn.rfxn.com/downloads/maldet.current.ver
# https://cdn.rfxn.com/downloads/maldet-sigpack.tgz
# https://cdn.rfxn.com/downloads/maldet-sigpack.tgz.md5
# https://cdn.rfxn.com/downloads/maldet.sigs.ver

use strict;
use warnings;

use Digest::MD5;
use Data::Dumper;
use LWP::UserAgent;
use Text::Match::FastAlternatives;

my $sigs_dir = '/root/bin/sigs/';

my $read_length = 0;
my %maldet_md5;
my @maldet_hex;
my $maldet_hex_match;
my @malicious_files;
my @compromised_files;

if (!@ARGV) {
	print STDERR "Usage: $0 file1 [file2 ...]\n";
	exit 1;
}

my $mech = LWP::UserAgent->new();

my $ver = 0;
if (open(my $fd, '<', "$sigs_dir/sigs/maldet.sigs.ver")) {
	$ver = <$fd>;
	chomp($ver);
	close($fd);
}

my $v = `wget -O - -q https://cdn.rfxn.com/downloads/maldet.sigs.ver`;
chomp($v);

if ($v != $ver) {
	system("wget -O - -q https://cdn.rfxn.com/downloads/maldet-sigpack.tgz | tar -C $sigs_dir/ -xz");
}

if (open(my $md5_db, '<', "$sigs_dir/sigs/rfxn.yara")) {
	my $description;
	while (my $line = <$md5_db>) {
		chomp($line);
		if ($line =~ m!description\s*=\s*"([^"]+)"!) {
			$description = $1;
		}
		elsif ($line =~ m!hash\s*=\s*"([^"]+)"!) {
			$maldet_md5{$1} = $description;
		}
	}
	close($md5_db);
}
else {
	print STDERR "Failed to open md5 database file: $!\n";
	exit 1;
}

if (open(my $md5_db, '<', "$sigs_dir/sigs/md5v2.dat")) {
	while (my $line = <$md5_db>) {
		chomp($line);
		my @fields = split(':', $line);
		$maldet_md5{$fields[0]} = $fields[2];
	}
	close($md5_db);
}
else {
	print STDERR "Failed to open md5 database file: $!\n";
	exit 1;
}

if (open(my $md5_db, '<', "$sigs_dir/sigs/rfxn.hdb")) {
	while (my $line = <$md5_db>) {
		chomp($line);
		my @fields = split(':', $line);
		$maldet_md5{$fields[0]} = $fields[2];
	}
	close($md5_db);
}
else {
	print STDERR "Failed to open md5 database file: $!\n";
	exit 1;
}

if (open(my $hex_db, '<', "$sigs_dir/sigs/rfxn.ndb")) {
	my @temp;
	while (my $line = <$hex_db>) {
		chomp($line);
		my @fields = split(':', $line);
		my $code = $fields[3];
		if ($code !~ m!^[a-fA-F0-9]+$!) {
			# Skip this regular expression-based substring since we do not support it. See additional note below.
			if ($code ne '7773682e72756e28(22|27)73746172742f6d666f726d6174633a2f6175746f746573742f75(22|27)293b616c65727428(22|27)696d706f7274616e743a77696e646f7773697372656d6f76696e67756e7573656474656d706f7261727966696c65732e') {
				print STDERR "Malformed hex: $code\n";
			}
			next;
		}
		$code =~ s/(..)/chr(hex($1))/eg;
		my $len = length($code) - 1;
		$len > $read_length ? $read_length = $len : 1 ;
		push(@temp, $code);
	}
	close($hex_db);
	@maldet_hex = sort { length($b) <=> length($a) } @temp, 
		# The original hex matched used a regular expression to permute "/', but this is not supported
		# as a string match so we permute them manually. Fortunately there is only one that behaves
		# this way.
		q{wsh.run("start/mformatc:/autotest/u");alert("important:windowsisremovingunusedtemporaryfiles.},
		q{wsh.run("start/mformatc:/autotest/u");alert('important:windowsisremovingunusedtemporaryfiles.},
		q{wsh.run('start/mformatc:/autotest/u');alert("important:windowsisremovingunusedtemporaryfiles.},
		q{wsh.run('start/mformatc:/autotest/u');alert('important:windowsisremovingunusedtemporaryfiles.};

	$maldet_hex_match = Text::Match::FastAlternatives->new(@maldet_hex);
}
else {
	print STDERR "Failed to open hex database file: $!\n";
	exit 1;
}

$read_length = 128*1024;

my $file_names;
open($file_names, '-|', "find", @ARGV, '-print0')
	or die "Could not run find: $!";

# Support find -print0
$/ = "\0";

my $filename;
while (defined($filename = <$file_names>)) {
	chomp($filename);

	my $ctx = Digest::MD5->new();

	if (! -e "$filename") {
		print STDERR "File $filename does not exist, skipping.\n";
		next;
	}

	my $fh;
	if (! open($fh, '<', $filename)) {
		print STDERR "Failed to open $filename, skipping: $!\n";
		next;
	}

	my $str;
	my $buffer = '';
	while (1) {
		my $l = read($fh, $str, $read_length);
		if (!defined($l)) {
			print STDERR "There was an error reading $filename: $!\n";
			last;
		}

		# Check for code injection
		if (length($buffer) == $read_length*2) {
			$buffer = substr($buffer, $read_length);
		}
		$buffer .= $str;

		if ($maldet_hex_match->match($buffer)) {
				push(@compromised_files, $filename);
				print "Compromised: $filename\n";
				last;
		}
=pod
		# It is still worth using this slow version to find out what the match location is:
		my $malicious = 0;
		foreach my $hex (@maldet_hex) {
			if (index($buffer, $hex) >= 0) {
				push(@compromised_files, $filename);
				print "Compromised: $filename: $hex\n";
				$malicious = 1;
				last;
			}
		}
		last if $malicious;
=cut

		# Build MD5
		$ctx->add($str) if $l;

		if ($l < $read_length) {
			last;
		}
		undef($str);
	}

	close($fh);

	my $digest = $ctx->hexdigest();
	if ($maldet_md5{$digest}) {
		push(@malicious_files, $filename);
		print "Malicious: $filename: $maldet_md5{$digest}: $digest\n";
	}
}

#print Dumper \@malicious_files;
#print Dumper \@compromised_files;

close($file_names);

exit 0;
