diff --git a/scripts/0clamav.sh b/scripts/0clamav.sh index 15a7433..47720c3 100644 --- a/scripts/0clamav.sh +++ b/scripts/0clamav.sh @@ -10,3 +10,7 @@ cp /var/lib/clamav/main.c*d . cp /var/lib/clamav/daily.c*d . sigtool -u main.c*d sigtool -u daily.c*d + +mv /tmp/mss/*.fp* exclusions/ +mv /tmp/mss/*.hsb raw/ +mv /tmp/mss/*.hdb raw/ diff --git a/scripts/0genbloom.sh b/scripts/0genbloom.sh index 8e8dbd7..c7fbf6c 100644 --- a/scripts/0genbloom.sh +++ b/scripts/0genbloom.sh @@ -11,3 +11,8 @@ cat extended.html >> production/index.html mv raw/hypatia-*-bloom.bin production/ sed -s 'N;/md5: 0, sha1: 0, sha256: 0/!P;D' -i production/index.html + + +sed -r '/Skipping/{/virustotal/!s~[^ -~]{64}+~&~}' -i production/index.html +sed -r '/Skipping/{/virustotal/!s~[^ -~]{40}+~&~}' -i production/index.html +sed -r '/Skipping/{/virustotal/!s~[^ -~]{32}+~&~}' -i production/index.html diff --git a/scripts/0malshare-bulk.sh b/scripts/0malshare-bulk.sh new file mode 100644 index 0000000..d1c2187 --- /dev/null +++ b/scripts/0malshare-bulk.sh @@ -0,0 +1,13 @@ +#startdate=2013-04-06 +#enddate=2017-09-11 + +startdate=2017-09-14 +enddate=2024-04-10 + +curr="$startdate" +while true; do + #echo "https://malshare.com/daily/_disabled/$curr/malshare_fileList.$curr.txt" + echo "https://malshare.com/daily/$curr/malshare_fileList.$curr.txt" + [ "$curr" \< "$enddate" ] || break + curr=$( date +%Y-%m-%d --date "$curr +1 day" ) +done diff --git a/scripts/0malshare-combine.sh b/scripts/0malshare-combine.sh new file mode 100644 index 0000000..87b9601 --- /dev/null +++ b/scripts/0malshare-combine.sh @@ -0,0 +1,14 @@ +sort -u malshare-md5/malshare_fileList.2013-*.txt > raw-extended/malshare-2013.md5 +sort -u malshare-md5/malshare_fileList.2014-*.txt > raw-extended/malshare-2014.md5 +sort -u malshare-md5/malshare_fileList.2015-*.txt > raw-extended/malshare-2015.md5 +sort -u malshare-md5/malshare_fileList.2016-*.txt > raw-extended/malshare-2016.md5 +sort -u malshare-md5/malshare_fileList.2017-*.txt > raw-extended/malshare-2017.md5 + +sort -u malshare-sha256/malshare_fileList.2017-*.sha256.txt > raw/malshare-2017.sha256 +sort -u malshare-sha256/malshare_fileList.2018-*.sha256.txt > raw/malshare-2018.sha256 +sort -u malshare-sha256/malshare_fileList.2019-*.sha256.txt > raw/malshare-2019.sha256 +sort -u malshare-sha256/malshare_fileList.2020-*.sha256.txt > raw/malshare-2020.sha256 +sort -u malshare-sha256/malshare_fileList.2021-*.sha256.txt > raw/malshare-2021.sha256 +sort -u malshare-sha256/malshare_fileList.2022-*.sha256.txt > raw/malshare-2022.sha256 +sort -u malshare-sha256/malshare_fileList.2023-*.sha256.txt > raw/malshare-2023.sha256 +sort -u malshare-sha256/malshare_fileList.2024-*.sha256.txt > raw/malshare-2024.sha256 diff --git a/scripts/0threatview.sh b/scripts/0threatview.sh index d4df9f4..165c895 100644 --- a/scripts/0threatview.sh +++ b/scripts/0threatview.sh @@ -1,4 +1,4 @@ -wget "https://threatview.io/Downloads/MD5-HASH-ALL.txt" -O - >> raw/threatview.md5 +wget "https://threatview.io/Downloads/MD5-HASH-ALL.txt" -O - | sed 's/MD5 of //' >> raw/threatview.md5 wget "https://threatview.io/Downloads/SHA-HASH-FEED.txt" -O - >> raw/threatview.sha1 sort -u -o raw/threatview.md5 raw/threatview.md5 diff --git a/scripts/Main.java b/scripts/Main.java index 5c53bb0..5346c37 100644 --- a/scripts/Main.java +++ b/scripts/Main.java @@ -32,6 +32,8 @@ import java.util.stream.Stream; import java.util.zip.GZIPInputStream; import java.util.regex.Matcher; import java.util.regex.Pattern; +//import org.sqlite.*; +//import java.sql.*; public class Main { @@ -57,6 +59,7 @@ public class Main { private static ArrayList arrExclusions = new ArrayList(); public static void main(String[] args) { + //isFileInNsrl("B61905308B336AD268A782790B661616"); signaturesMD5 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 6000000, 0.00001); //6m signaturesSHA1 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 50000, 0.00001); //50k signaturesSHA256 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 2000000, 0.00001); //2m @@ -178,6 +181,9 @@ public class Main { System.out.println("\t\tSkipping excluded hash: " + potentialHash); return; } + //if(isFileInNsrl(potentialHash)) { + // return; + //} if (potentialHash.length() == 32) { if (signaturesMD5.put(potentialHash)) { amtSignaturesAddedMD5++; @@ -206,4 +212,51 @@ public class Main { } } } + + //CREATE INDEX hashIndexMD5 ON FILE (md5); CREATE INDEX hashIndexSHA1 ON FILE (sha1); CREATE INDEX hashIndexSHA256 ON FILE (sha256); + //CREATE INDEX hashIndex ON FILE (md5,sha1,sha256); + /*private static String url = "jdbc:sqlite:RDS_2024.03.1_android_minimal.db"; + private static Connection connection = null; + private static Statement statement = null; + private static boolean isFileInNsrl(String hash) { + String hashType = null; + switch(hash.length()) { + case 32: + hashType = "md5"; + break; + case 40: + hashType ="sha1"; + break; + case 64: + hashType ="sha256"; + break; + default: + return false; + } + String sql = "select package_id,md5,sha1,sha256 from FILE where " + hashType + " = '" + hash.toUpperCase() + "';"; + try { + if(connection == null || statement == null) { + SQLiteConfig config = new SQLiteConfig(); + config.setReadOnly(true); + config.setSharedCache(true); + connection = DriverManager.getConnection(url, config.toProperties()); + statement = connection.createStatement(); + } + ResultSet rs = statement.executeQuery(sql); + if(rs.next()) { + System.out.println("\t\tSkipping excluded NSRL hash: "+ rs.getString("md5").toLowerCase()); + System.out.println("\t\tSkipping excluded NSRL hash: " + rs.getString("sha1").toLowerCase()); + System.out.println("\t\tSkipping excluded NSRL hash: " + rs.getString("sha256").toLowerCase()); + ResultSet rsPkg = statement.executeQuery("select name from PKG where package_id = '" + rs.getString("package_id") + "';"); + if (rsPkg.next()) { + System.out.println("\t\t\tFrom: " + rsPkg.getString("name")); + } + return true; + } + return false; + } catch (SQLException e) { + e.printStackTrace(); + } + return false; + }*/ }