DiscIO: Parallelize the re-encryption code

2020-01-25 14:03:11 +01:00 · 2020-01-25 14:03:11 +01:00 · da9e0fb598
parent 319c508978
commit da9e0fb598
1 changed files with 108 additions and 59 deletions
--- a/Source/Core/DiscIO/VolumeWii.cpp
+++ b/Source/Core/DiscIO/VolumeWii.cpp
@ -8,16 +8,19 @@
 #include <array>
 #include <cstddef>
 #include <cstring>
+#include <future>
 #include <map>
 #include <memory>
 #include <optional>
 #include <string>
+#include <thread>
 #include <utility>
 #include <vector>

 #include <mbedtls/aes.h>
 #include <mbedtls/sha1.h>

+#include "Common/Align.h"
 #include "Common/Assert.h"
 #include "Common/CommonTypes.h"
 #include "Common/Logging/Log.h"
@ -530,86 +533,132 @@ bool VolumeWii::EncryptGroup(u64 offset, u64 partition_data_offset,
                             std::array<u8, GROUP_TOTAL_SIZE>* out)
 {
  std::vector<std::array<u8, BLOCK_DATA_SIZE>> unencrypted_data(BLOCKS_PER_GROUP);
-
-  for (size_t i = 0; i < BLOCKS_PER_GROUP; ++i)
-  {
-    if (offset + (i + 1) * BLOCK_DATA_SIZE <= partition_data_decrypted_size)
-    {
-      if (!blob->ReadWiiDecrypted(offset + i * BLOCK_DATA_SIZE, BLOCK_DATA_SIZE,
-                                  unencrypted_data[i].data(), partition_data_offset))
-      {
-        return false;
-      }
-    }
-    else
-    {
-      unencrypted_data[i].fill(0);
-    }
-  }
-
  std::vector<std::array<u8, BLOCK_HEADER_SIZE>> unencrypted_hashes(BLOCKS_PER_GROUP);

-  // H0 hashes
+  std::array<std::future<void>, BLOCKS_PER_GROUP> hash_futures;
+  bool error_occurred = false;
+
  for (size_t i = 0; i < BLOCKS_PER_GROUP; ++i)
  {
-    for (u32 j = 0; j < 31; ++j)
+    if (!error_occurred)
    {
-      mbedtls_sha1_ret(unencrypted_data[i].data() + j * 0x400, 0x400,
-                       unencrypted_hashes[i].data() + j * SHA1_SIZE);
+      if (offset + (i + 1) * BLOCK_DATA_SIZE <= partition_data_decrypted_size)
+      {
+        if (!blob->ReadWiiDecrypted(offset + i * BLOCK_DATA_SIZE, BLOCK_DATA_SIZE,
+                                    unencrypted_data[i].data(), partition_data_offset))
+        {
+          error_occurred = true;
+        }
+      }
+      else
+      {
+        unencrypted_data[i].fill(0);
+      }
    }

-    std::memset(unencrypted_hashes[i].data() + 0x26C, 0, 0x14);
+    hash_futures[i] = std::async(std::launch::async, [&unencrypted_data, &unencrypted_hashes,
+                                                      &hash_futures, error_occurred, i]() {
+      const size_t h1_base = Common::AlignDown(i, 8);
+
+      if (!error_occurred)
+      {
+        // H0 hashes
+        for (size_t j = 0; j < 31; ++j)
+        {
+          mbedtls_sha1_ret(unencrypted_data[i].data() + j * 0x400, 0x400,
+                           unencrypted_hashes[i].data() + j * SHA1_SIZE);
+        }
+
+        // H0 padding
+        std::memset(unencrypted_hashes[i].data() + 0x26C, 0, 0x14);
+
+        // H1 hash
+        mbedtls_sha1_ret(unencrypted_hashes[i].data(), 0x26C,
+                         unencrypted_hashes[h1_base].data() + 0x280 + (i - h1_base) * SHA1_SIZE);
+      }
+
+      if (i % 8 == 7)
+      {
+        for (size_t j = 0; j < 7; ++j)
+          hash_futures[h1_base + j].get();
+
+        if (!error_occurred)
+        {
+          // H1 padding
+          std::memset(unencrypted_hashes[h1_base].data() + 0x320, 0, 0x20);
+
+          // H1 copies
+          for (size_t j = 1; j < 8; ++j)
+          {
+            std::memcpy(unencrypted_hashes[h1_base + j].data() + 0x280,
+                        unencrypted_hashes[h1_base].data() + 0x280, 0xC0);
+          }
+
+          // H2 hash
+          mbedtls_sha1_ret(unencrypted_hashes[h1_base].data() + 0x280, 0xA0,
+                           unencrypted_hashes[0].data() + 0x340 + h1_base / 8 * SHA1_SIZE);
+        }
+
+        if (i == BLOCKS_PER_GROUP - 1)
+        {
+          for (size_t j = 0; j < 7; ++j)
+            hash_futures[j * 8 + 7].get();
+
+          if (!error_occurred)
+          {
+            // H2 padding
+            std::memset(unencrypted_hashes[0].data() + 0x3E0, 0, 0x20);
+
+            // H2 copies
+            for (size_t j = 1; j < BLOCKS_PER_GROUP; ++j)
+            {
+              std::memcpy(unencrypted_hashes[j].data() + 0x340,
+                          unencrypted_hashes[0].data() + 0x340, 0xC0);
+            }
+          }
+        }
+      }
+    });
  }

-  // H1 hashes
-  for (size_t i = 0; i < BLOCKS_PER_GROUP / 8; ++i)
-  {
-    for (u32 j = 0; j < 8; ++j)
-    {
-      mbedtls_sha1_ret(unencrypted_hashes[i * 8 + j].data(), 0x26C,
-                       unencrypted_hashes[i * 8].data() + 0x280 + j * SHA1_SIZE);
-    }
+  // Wait for all the async tasks to finish
+  hash_futures.back().get();

-    std::memset(unencrypted_hashes[i * 8].data() + 0x320, 0, 0x20);
+  if (error_occurred)
+    return false;

-    for (u32 j = 1; j < 8; ++j)
-    {
-      std::memcpy(unencrypted_hashes[i * 8 + j].data() + 0x280,
-                  unencrypted_hashes[i * 8].data() + 0x280, 0xC0);
-    }
-  }
+  const unsigned int threads =
+      std::min(BLOCKS_PER_GROUP, std::max<unsigned int>(1, std::thread::hardware_concurrency()));

-  // H2 hashes
-  {
-    for (u32 i = 0; i < BLOCKS_PER_GROUP / 8; ++i)
-    {
-      mbedtls_sha1_ret(unencrypted_hashes[i * 8].data() + 0x280, 0xA0,
-                       unencrypted_hashes[0].data() + 0x340 + i * SHA1_SIZE);
-    }
-
-    std::memset(unencrypted_hashes[0].data() + 0x3E0, 0, 0x20);
-
-    for (size_t i = 1; i < BLOCKS_PER_GROUP; ++i)
-      std::memcpy(unencrypted_hashes[i].data() + 0x340, unencrypted_hashes[0].data() + 0x340, 0xC0);
-  }
+  std::vector<std::future<void>> encryption_futures(threads);

  mbedtls_aes_context aes_context;
  mbedtls_aes_setkey_enc(&aes_context, key.data(), 128);

-  // Encryption
-  for (size_t i = 0; i < BLOCKS_PER_GROUP; ++i)
+  for (size_t i = 0; i < threads; ++i)
  {
-    u8* out_ptr = out->data() + i * BLOCK_TOTAL_SIZE;
+    encryption_futures[i] = std::async(
+        std::launch::async,
+        [&unencrypted_data, &unencrypted_hashes, &aes_context, &out](size_t start, size_t end) {
+          for (size_t i = start; i < end; ++i)
+          {
+            u8* out_ptr = out->data() + i * BLOCK_TOTAL_SIZE;

-    u8 iv[16] = {};
-    mbedtls_aes_crypt_cbc(&aes_context, MBEDTLS_AES_ENCRYPT, BLOCK_HEADER_SIZE, iv,
-                          unencrypted_hashes[i].data(), out_ptr);
+            u8 iv[16] = {};
+            mbedtls_aes_crypt_cbc(&aes_context, MBEDTLS_AES_ENCRYPT, BLOCK_HEADER_SIZE, iv,
+                                  unencrypted_hashes[i].data(), out_ptr);

-    std::memcpy(iv, out_ptr + 0x3D0, sizeof(iv));
-    mbedtls_aes_crypt_cbc(&aes_context, MBEDTLS_AES_ENCRYPT, BLOCK_DATA_SIZE, iv,
-                          unencrypted_data[i].data(), out_ptr + BLOCK_HEADER_SIZE);
+            std::memcpy(iv, out_ptr + 0x3D0, sizeof(iv));
+            mbedtls_aes_crypt_cbc(&aes_context, MBEDTLS_AES_ENCRYPT, BLOCK_DATA_SIZE, iv,
+                                  unencrypted_data[i].data(), out_ptr + BLOCK_HEADER_SIZE);
+          }
+        },
+        i * BLOCKS_PER_GROUP / threads, (i + 1) * BLOCKS_PER_GROUP / threads);
  }

+  for (std::future<void>& future : encryption_futures)
+    future.get();
+
  return true;
 }