diff --git a/scripts/0clamav.sh b/scripts/0clamav.sh
index 15a7433..47720c3 100644
--- a/scripts/0clamav.sh
+++ b/scripts/0clamav.sh
@@ -10,3 +10,7 @@ cp /var/lib/clamav/main.c*d .
cp /var/lib/clamav/daily.c*d .
sigtool -u main.c*d
sigtool -u daily.c*d
+
+mv /tmp/mss/*.fp* exclusions/
+mv /tmp/mss/*.hsb raw/
+mv /tmp/mss/*.hdb raw/
diff --git a/scripts/0genbloom.sh b/scripts/0genbloom.sh
index 8e8dbd7..c7fbf6c 100644
--- a/scripts/0genbloom.sh
+++ b/scripts/0genbloom.sh
@@ -11,3 +11,8 @@ cat extended.html >> production/index.html
mv raw/hypatia-*-bloom.bin production/
sed -s 'N;/md5: 0, sha1: 0, sha256: 0/!P;D' -i production/index.html
+
+
+sed -r '/Skipping/{/virustotal/!s~[^ -~]{64}+~&~}' -i production/index.html
+sed -r '/Skipping/{/virustotal/!s~[^ -~]{40}+~&~}' -i production/index.html
+sed -r '/Skipping/{/virustotal/!s~[^ -~]{32}+~&~}' -i production/index.html
diff --git a/scripts/0malshare-bulk.sh b/scripts/0malshare-bulk.sh
new file mode 100644
index 0000000..d1c2187
--- /dev/null
+++ b/scripts/0malshare-bulk.sh
@@ -0,0 +1,13 @@
+#startdate=2013-04-06
+#enddate=2017-09-11
+
+startdate=2017-09-14
+enddate=2024-04-10
+
+curr="$startdate"
+while true; do
+ #echo "https://malshare.com/daily/_disabled/$curr/malshare_fileList.$curr.txt"
+ echo "https://malshare.com/daily/$curr/malshare_fileList.$curr.txt"
+ [ "$curr" \< "$enddate" ] || break
+ curr=$( date +%Y-%m-%d --date "$curr +1 day" )
+done
diff --git a/scripts/0malshare-combine.sh b/scripts/0malshare-combine.sh
new file mode 100644
index 0000000..87b9601
--- /dev/null
+++ b/scripts/0malshare-combine.sh
@@ -0,0 +1,14 @@
+sort -u malshare-md5/malshare_fileList.2013-*.txt > raw-extended/malshare-2013.md5
+sort -u malshare-md5/malshare_fileList.2014-*.txt > raw-extended/malshare-2014.md5
+sort -u malshare-md5/malshare_fileList.2015-*.txt > raw-extended/malshare-2015.md5
+sort -u malshare-md5/malshare_fileList.2016-*.txt > raw-extended/malshare-2016.md5
+sort -u malshare-md5/malshare_fileList.2017-*.txt > raw-extended/malshare-2017.md5
+
+sort -u malshare-sha256/malshare_fileList.2017-*.sha256.txt > raw/malshare-2017.sha256
+sort -u malshare-sha256/malshare_fileList.2018-*.sha256.txt > raw/malshare-2018.sha256
+sort -u malshare-sha256/malshare_fileList.2019-*.sha256.txt > raw/malshare-2019.sha256
+sort -u malshare-sha256/malshare_fileList.2020-*.sha256.txt > raw/malshare-2020.sha256
+sort -u malshare-sha256/malshare_fileList.2021-*.sha256.txt > raw/malshare-2021.sha256
+sort -u malshare-sha256/malshare_fileList.2022-*.sha256.txt > raw/malshare-2022.sha256
+sort -u malshare-sha256/malshare_fileList.2023-*.sha256.txt > raw/malshare-2023.sha256
+sort -u malshare-sha256/malshare_fileList.2024-*.sha256.txt > raw/malshare-2024.sha256
diff --git a/scripts/0threatview.sh b/scripts/0threatview.sh
index d4df9f4..165c895 100644
--- a/scripts/0threatview.sh
+++ b/scripts/0threatview.sh
@@ -1,4 +1,4 @@
-wget "https://threatview.io/Downloads/MD5-HASH-ALL.txt" -O - >> raw/threatview.md5
+wget "https://threatview.io/Downloads/MD5-HASH-ALL.txt" -O - | sed 's/MD5 of //' >> raw/threatview.md5
wget "https://threatview.io/Downloads/SHA-HASH-FEED.txt" -O - >> raw/threatview.sha1
sort -u -o raw/threatview.md5 raw/threatview.md5
diff --git a/scripts/Main.java b/scripts/Main.java
index 5c53bb0..5346c37 100644
--- a/scripts/Main.java
+++ b/scripts/Main.java
@@ -32,6 +32,8 @@ import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+//import org.sqlite.*;
+//import java.sql.*;
public class Main {
@@ -57,6 +59,7 @@ public class Main {
private static ArrayList arrExclusions = new ArrayList();
public static void main(String[] args) {
+ //isFileInNsrl("B61905308B336AD268A782790B661616");
signaturesMD5 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 6000000, 0.00001); //6m
signaturesSHA1 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 50000, 0.00001); //50k
signaturesSHA256 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 2000000, 0.00001); //2m
@@ -178,6 +181,9 @@ public class Main {
System.out.println("\t\tSkipping excluded hash: " + potentialHash);
return;
}
+ //if(isFileInNsrl(potentialHash)) {
+ // return;
+ //}
if (potentialHash.length() == 32) {
if (signaturesMD5.put(potentialHash)) {
amtSignaturesAddedMD5++;
@@ -206,4 +212,51 @@ public class Main {
}
}
}
+
+ //CREATE INDEX hashIndexMD5 ON FILE (md5); CREATE INDEX hashIndexSHA1 ON FILE (sha1); CREATE INDEX hashIndexSHA256 ON FILE (sha256);
+ //CREATE INDEX hashIndex ON FILE (md5,sha1,sha256);
+ /*private static String url = "jdbc:sqlite:RDS_2024.03.1_android_minimal.db";
+ private static Connection connection = null;
+ private static Statement statement = null;
+ private static boolean isFileInNsrl(String hash) {
+ String hashType = null;
+ switch(hash.length()) {
+ case 32:
+ hashType = "md5";
+ break;
+ case 40:
+ hashType ="sha1";
+ break;
+ case 64:
+ hashType ="sha256";
+ break;
+ default:
+ return false;
+ }
+ String sql = "select package_id,md5,sha1,sha256 from FILE where " + hashType + " = '" + hash.toUpperCase() + "';";
+ try {
+ if(connection == null || statement == null) {
+ SQLiteConfig config = new SQLiteConfig();
+ config.setReadOnly(true);
+ config.setSharedCache(true);
+ connection = DriverManager.getConnection(url, config.toProperties());
+ statement = connection.createStatement();
+ }
+ ResultSet rs = statement.executeQuery(sql);
+ if(rs.next()) {
+ System.out.println("\t\tSkipping excluded NSRL hash: "+ rs.getString("md5").toLowerCase());
+ System.out.println("\t\tSkipping excluded NSRL hash: " + rs.getString("sha1").toLowerCase());
+ System.out.println("\t\tSkipping excluded NSRL hash: " + rs.getString("sha256").toLowerCase());
+ ResultSet rsPkg = statement.executeQuery("select name from PKG where package_id = '" + rs.getString("package_id") + "';");
+ if (rsPkg.next()) {
+ System.out.println("\t\t\tFrom: " + rsPkg.getString("name"));
+ }
+ return true;
+ }
+ return false;
+ } catch (SQLException e) {
+ e.printStackTrace();
+ }
+ return false;
+ }*/
}