From a545ae6b638e95b7b768c13fcee92306f7130184 Mon Sep 17 00:00:00 2001 From: Tad Date: Thu, 28 Dec 2023 18:14:28 -0500 Subject: [PATCH] Parser updates Signed-off-by: Tad --- scripts/0avast-covid19.sh | 5 +++ scripts/0clamav.sh | 38 +---------------- scripts/0eset.sh | 2 +- scripts/0genbloom.sh | 10 +++++ scripts/0sign.sh | 14 ------- scripts/0stalkerware.sh | 4 +- scripts/0targetedthreats.sh | 5 +-- scripts/0threatfox.sh | 5 +++ scripts/Main.java | 82 ++++++++++++++++++++++++++----------- 9 files changed, 82 insertions(+), 83 deletions(-) create mode 100644 scripts/0avast-covid19.sh create mode 100644 scripts/0genbloom.sh delete mode 100644 scripts/0sign.sh create mode 100644 scripts/0threatfox.sh diff --git a/scripts/0avast-covid19.sh b/scripts/0avast-covid19.sh new file mode 100644 index 0000000..9ca11fe --- /dev/null +++ b/scripts/0avast-covid19.sh @@ -0,0 +1,5 @@ +#!/bin/sh +#License: CC0 +#Description: Hypatia conversion script for https://github.com/avast/covid-19-ioc + +tail -n +2 */*.csv | sed 's/,/ , /' | awk '{ print $1 }' | sort -u >> avast-covid19.sha256 diff --git a/scripts/0clamav.sh b/scripts/0clamav.sh index df87c9d..15a7433 100644 --- a/scripts/0clamav.sh +++ b/scripts/0clamav.sh @@ -1,48 +1,12 @@ #!/bin/sh -#License: GPL-3.0 +#License: CC0 #Description: Hypatia conversion script for ClamAV databases (GPL-2.0) #sudo -i freshclam origDir="$PWD" mkdir /tmp/mss -mkdir /tmp/mss/optimized -mkdir /tmp/mss/processed cd /tmp/mss cp /var/lib/clamav/main.c*d . cp /var/lib/clamav/daily.c*d . sigtool -u main.c*d sigtool -u daily.c*d - -#MD5 -grep "Andr\\." main.hdb >> Android.hdb -grep "Andr\\." daily.hdb >> Android.hdb -#grep "Java\\." main.hdb >> Android.hdb -#grep "Java\\." daily.hdb >> Android.hdb -grep "Unix\\." main.hdb >> Android.hdb -grep "Unix\\." daily.hdb >> Android.hdb -grep "Multios\\." main.hdb >> Android.hdb -grep "Multios\\." daily.hdb >> Android.hdb - -#SHA -grep "Andr\\." main.hsb >> Android.hsb -grep "Andr\\." daily.hsb >> Android.hsb -#grep "Java\\." main.hsb >> Android.hsb -#grep "Java\\." daily.hsb >> Android.hsb -grep "Unix\\." main.hsb >> Android.hsb -grep "Unix\\." daily.hsb >> Android.hsb -grep "Multios\\." main.hsb >> Android.hsb -grep "Multios\\." daily.hsb >> Android.hsb - -databases=("Android.hdb" "Android.hsb" "main.hdb" "main.hsb" "daily.hdb" "daily.hsb"); -for db in "${databases[@]}" -do - #remove unnecessary bits to reduce file size and app memory usage - python "$origDir"/optimize.py "$db" >> optimized/"$db"; - #sort to increase compression efficiency - sort -k3 -t ":" --parallel=$(nproc) --output processed/"$db" optimized/"$db"; -done; - -gzip -k /tmp/mss/*.hdb -gzip -k /tmp/mss/*.hsb -gzip -k /tmp/mss/processed/*.hdb -gzip -k /tmp/mss/processed/*.hsb diff --git a/scripts/0eset.sh b/scripts/0eset.sh index bd82e44..fd8ab1c 100644 --- a/scripts/0eset.sh +++ b/scripts/0eset.sh @@ -1,5 +1,5 @@ #!/bin/bash -#License: GPLv3 +#License: CC0 #Description: Hypatia conversion script for https://github.com/eset/malware-ioc (BSD-2-Clause) processHashes() { diff --git a/scripts/0genbloom.sh b/scripts/0genbloom.sh new file mode 100644 index 0000000..d13aade --- /dev/null +++ b/scripts/0genbloom.sh @@ -0,0 +1,10 @@ +#!/bin/sh +#License: CC0 + +rm production/index.html +cp template.html production/index.html +echo "
" >> production/index.html
+sed -i "s/\[DATE\]/$(date -u)/" production/index.html
+java -jar HypatiaDatabaseConverter-0.1__.jar raw/ >> production/index.html
+echo "
" >> production/index.html +mv raw/hypatia-*-bloom.bin production/ diff --git a/scripts/0sign.sh b/scripts/0sign.sh deleted file mode 100644 index 95db9f9..0000000 --- a/scripts/0sign.sh +++ /dev/null @@ -1,14 +0,0 @@ -for database in *.bin -do - if [ -f "$database.sig" ]; then - #If it does exist sign if it doesn't match - if ! gpg --verify "$database.sig"; then - rm "$database.sig"; - gpg --sign --local-user 6395FC9911EDCD6158712DF7BADFCABDDBF5B694 --detach-sign "$database"; - fi; - else - #Sign it if it doesn't exist - gpg --sign --local-user 6395FC9911EDCD6158712DF7BADFCABDDBF5B694 --detach-sign "$database"; - fi; - -done diff --git a/scripts/0stalkerware.sh b/scripts/0stalkerware.sh index 28298b3..c546afc 100644 --- a/scripts/0stalkerware.sh +++ b/scripts/0stalkerware.sh @@ -1,5 +1,5 @@ #!/bin/bash -#License: GPLv3 +#License: CC0 #Description: Hypatia conversion script for https://github.com/AssoEchap/stalkerware-indicators (CC BY 4.0) while IFS=, read -r col1SHA col2Package col3Certificate col4Version col5Name @@ -10,5 +10,3 @@ do done < samples.csv; sed -i '1d' stalkerware.hsb; - -gzip stalkerware.hsb; diff --git a/scripts/0targetedthreats.sh b/scripts/0targetedthreats.sh index 6a5915d..93f07a3 100644 --- a/scripts/0targetedthreats.sh +++ b/scripts/0targetedthreats.sh @@ -1,5 +1,5 @@ #!/bin/bash -#License: GPLv3 +#License: CC0 #Description: Hypatia conversion script for https://github.com/botherder/targetedthreats (CC BY-SA 4.0) while IFS=, read -r col1MD5 col2SHA256 col3Family col4Actor col5Country col6Report @@ -30,6 +30,3 @@ done < samples.csv; sed -i '1d' targetedthreats.hdb; sed -i '1d' targetedthreats.hsb; - -gzip targetedthreats.hdb; -gzip targetedthreats.hsb; diff --git a/scripts/0threatfox.sh b/scripts/0threatfox.sh new file mode 100644 index 0000000..2bf8133 --- /dev/null +++ b/scripts/0threatfox.sh @@ -0,0 +1,5 @@ +#!/bin/sh +#License: CC0 +#Description: Hypatia conversion script for https://threatfox.abuse.ch/export/csv/sha256/full/ (CC0) + +tail -n +10 full_sha256.csv | awk '{ print $4 } ' | sed 's/^"//' | sed 's/",$//' > threatfox.sha256 diff --git a/scripts/Main.java b/scripts/Main.java index 76f81ba..9784e75 100644 --- a/scripts/Main.java +++ b/scripts/Main.java @@ -26,39 +26,61 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.InputStreamReader; +import java.util.*; +import java.util.Arrays; import java.util.zip.GZIPInputStream; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Main { - public static BloomFilter signaturesMD5 = null; - public static BloomFilter signaturesSHA1 = null; - public static BloomFilter signaturesSHA256 = null; + private static BloomFilter signaturesMD5 = null; + private static BloomFilter signaturesSHA1 = null; + private static BloomFilter signaturesSHA256 = null; - public static int amtLinesValid = 0; - public static int amtLinesInvalid = 0; + private static int amtLinesValid = 0; + private static int amtLinesInvalid = 0; + private static int amtSignaturesReadMD5 = 0; + private static int amtSignaturesReadSHA1 = 0; + private static int amtSignaturesReadSHA256 = 0; - public static int amtSignaturesReadMD5 = 0; - public static int amtSignaturesReadSHA1 = 0; - public static int amtSignaturesReadSHA256 = 0; + private static int amtSignaturesAddedMD5 = 0; + private static int amtSignaturesAddedSHA1 = 0; + private static int amtSignaturesAddedSHA256 = 0; - public static int amtSignaturesAddedMD5 = 0; - public static int amtSignaturesAddedSHA1 = 0; - public static int amtSignaturesAddedSHA256 = 0; + private static int amtPreviousSignaturesMD5 = 0; + private static int amtPreviousSignaturesSHA1 = 0; + private static int amtPreviousSignaturesSHA256 = 0; - public static int amtPreviousSignaturesMD5 = 0; - public static int amtPreviousSignaturesSHA1 = 0; - public static int amtPreviousSignaturesSHA256 = 0; + private static ArrayList arrExclusions = new ArrayList(); public static void main(String[] args) { - signaturesMD5 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 5800000, 0.00001); //5.8m - signaturesSHA1 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 10000, 0.00001); //10k - signaturesSHA256 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 800000, 0.00001); //800k + signaturesMD5 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 6000000, 0.00001); //6m + signaturesSHA1 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 50000, 0.00001); //50k + signaturesSHA256 = BloomFilter.create(Funnels.stringFunnel(Charsets.US_ASCII), 2000000, 0.00001); //2m + + try { + File exclusionDatabase = new File(args[0] + "../excluded.hashes"); + if(exclusionDatabase.exists()) { + Scanner s = new Scanner(exclusionDatabase); + while(s.hasNextLine()) { + String line = s.nextLine().trim(); + if(!line.startsWith("#") && (line.length() == 32 || line.length() == 40 || line.length() == 64)) { + arrExclusions.add(line); + } + } + s.close(); + System.out.println("Loaded " + arrExclusions.size() + " excluded hashes"); + } + } catch (Exception e) { + e.printStackTrace(); + } System.out.println("Processing:"); - for (File databaseLocation : new File(args[0]).listFiles()) { + File[] databases = new File(args[0]).listFiles(); + Arrays.sort(databases); + for (File databaseLocation : databases) { System.out.println("\t" + databaseLocation); amtPreviousSignaturesMD5 = amtSignaturesAddedMD5; amtPreviousSignaturesSHA1 = amtSignaturesAddedSHA1; @@ -71,20 +93,28 @@ public class Main { reader = new BufferedReader(new FileReader(databaseLocation)); } String line; - if (databaseLocation.getName().contains(".hdb") //.hdb format: md5, size, name - || databaseLocation.getName().contains(".hsb")) {//.hsb format: sha256, size, name + if (databaseLocation.getName().endsWith(".hdb") //.hdb/.hsb format: hash:size:name:version + || databaseLocation.getName().endsWith(".hsb")) { while ((line = reader.readLine()) != null) { if (line.length() > 0 && line.contains(":")) { String[] lineS = line.trim().toLowerCase().split(":"); - addChecked(lineS[0]); + addChecked(lineS[0].trim()); } } - } else if (databaseLocation.getName().contains(".md5") - || databaseLocation.getName().contains(".sha1") - || databaseLocation.getName().contains(".sha256")) {//one signature per line + } else if (databaseLocation.getName().endsWith(".md5") + || databaseLocation.getName().endsWith(".sha1") + || databaseLocation.getName().endsWith(".sha256") + || databaseLocation.getName().endsWith(".hashes")) {//one signature per line while ((line = reader.readLine()) != null) { addChecked(line.trim().toLowerCase()); } + } else if (databaseLocation.getName().endsWith(".loki")) {//.loki format: hash;comment + while ((line = reader.readLine()) != null) { + if (line.length() > 0 && line.contains(";")) { + String[] lineS = line.trim().toLowerCase().split(";"); + addChecked(lineS[0].trim()); + } + } } reader.close(); System.out.println("\t\tmd5: " + (amtSignaturesAddedMD5 - amtPreviousSignaturesMD5) + ", sha1: " + (amtSignaturesAddedSHA1 - amtPreviousSignaturesSHA1) + ", sha256: " + (amtSignaturesAddedSHA256 - amtPreviousSignaturesSHA256)); @@ -126,6 +156,10 @@ public class Main { private static void addChecked(String potentialHash) { if (!potentialHash.startsWith("#") && potentialHash.length() >= 4) { if (isHexadecimal(potentialHash)) { + if(arrExclusions.contains(potentialHash)) { + System.out.println("\t\tSkipping excluded hash: " + potentialHash); + return; + } if (potentialHash.length() == 32) { if (signaturesMD5.put(potentialHash)) { amtSignaturesAddedMD5++;