Parallel transfers

This commit is contained in:
2025-11-07 18:14:43 +01:00
parent 5a64558bb9
commit 7c2bb65a86
4 changed files with 302 additions and 68 deletions

3
.gitignore vendored
View File

@@ -4,3 +4,6 @@ khdb.txt.zip
ElysiumSettings.txt
/Reports
/khdb-shards
khdb-manifest.json
/elysium/.vscode
/.vscode

View File

@@ -60,6 +60,9 @@ param(
[switch]$ShowProgress,
[int]$ProgressUpdateInterval = 100000,
[ValidateRange(1, 64)]
[int]$MaxParallelTransfers = 5,
[switch]$ForcePlainText,
[string]$CheckpointPath,
@@ -124,6 +127,12 @@ function Get-SettingsValue {
return $null
}
function Get-FunctionDefinitionText {
param([Parameter(Mandatory = $true)][string]$Name)
$cmd = Get-Command -Name $Name -CommandType Function -ErrorAction Stop
return $cmd.ScriptBlock.Ast.Extent.Text
}
function Merge-ShardsToFile {
param(
[psobject]$Manifest,
@@ -851,6 +860,44 @@ if ($resolvedSettingsPath) {
}
}
$psSupportsParallel = ($PSVersionTable.PSVersion.Major -ge 7)
$effectiveParallelTransfers = if ($MaxParallelTransfers -lt 1) { 1 } else { [int]$MaxParallelTransfers }
$parallelTransfersEnabled = $psSupportsParallel -and $effectiveParallelTransfers -gt 1
if (-not $psSupportsParallel -and $effectiveParallelTransfers -gt 1) {
Write-Verbose "Parallel transfers requested but PowerShell $($PSVersionTable.PSVersion) does not support ForEach-Object -Parallel; using serial mode."
}
$parallelAzureUploadHelpers = $null
$parallelAzureUploadHelperList = @()
$parallelS3UploadHelpers = $null
$parallelS3UploadHelperList = @()
if ($parallelTransfersEnabled) {
$parallelAzureUploadHelpers = @{
'Build-BlobUri' = Get-FunctionDefinitionText 'Build-BlobUri'
'Upload-AzureBlob' = Get-FunctionDefinitionText 'Upload-AzureBlob'
}
$parallelAzureUploadHelperList = $parallelAzureUploadHelpers.GetEnumerator() | ForEach-Object {
[pscustomobject]@{ Name = $_.Key; Definition = $_.Value }
}
$parallelS3UploadHelpers = @{}
@(
'Get-Bytes',
'Get-HashHex',
'HmacSha256',
'ToHex',
'GetSignatureKey',
'UriEncode',
'BuildCanonicalPath',
'BuildAuthHeaders',
'BuildS3Uri',
'Invoke-S3HttpUpload'
) | ForEach-Object {
$parallelS3UploadHelpers[$_] = Get-FunctionDefinitionText $_
}
$parallelS3UploadHelperList = $parallelS3UploadHelpers.GetEnumerator() | ForEach-Object {
[pscustomobject]@{ Name = $_.Key; Definition = $_.Value }
}
}
# Apply defaults from settings when caller did not specify overrides
if ($elysiumSettings) {
if (-not $PSBoundParameters.ContainsKey('StorageProvider')) {
@@ -1170,12 +1217,36 @@ switch ($StorageProvider.ToUpperInvariant()) {
if ([string]::IsNullOrWhiteSpace($ContainerName)) { throw 'containerName is required for Azure uploads.' }
if ([string]::IsNullOrWhiteSpace($SasToken)) { throw 'sasToken is required for Azure uploads.' }
Write-Host "Uploading shards to Azure Blob Storage container '$ContainerName'..."
foreach ($entry in $manifestShards) {
$localPath = Join-Path -Path $localShardRoot -ChildPath $entry.name
$remoteKey = Combine-StoragePath -Prefix $normalizedShardPrefix -Name $entry.name
Write-Host (" -> {0}" -f $remoteKey)
Upload-AzureBlob -Account $StorageAccountName -Container $ContainerName -Sas $SasToken -BlobName $remoteKey -FilePath $localPath -ContentType 'text/plain'
if ($parallelTransfersEnabled) {
Write-Host ("Uploading shards to Azure Blob Storage container '{0}' with up to {1} concurrent transfer(s)..." -f $ContainerName, $effectiveParallelTransfers)
$prefixForParallelUpload = if ([string]::IsNullOrWhiteSpace($normalizedShardPrefix)) { $null } else { $normalizedShardPrefix.Replace('\', '/').Trim('/') }
$manifestShards | ForEach-Object -Parallel {
param($entry)
try {
foreach ($helper in $using:parallelAzureUploadHelperList) {
if (-not (Get-Command $helper.Name -ErrorAction SilentlyContinue)) {
Invoke-Expression $helper.Definition
}
}
$localPath = Join-Path -Path $using:localShardRoot -ChildPath $entry.name
$remoteKey = $entry.name.Replace('\', '/').TrimStart('/')
if (-not [string]::IsNullOrWhiteSpace($using:prefixForParallelUpload)) {
$remoteKey = $using:prefixForParallelUpload + '/' + $remoteKey
}
Upload-AzureBlob -Account $using:StorageAccountName -Container $using:ContainerName -Sas $using:SasToken -BlobName $remoteKey -FilePath $localPath -ContentType 'text/plain'
Write-Host (" -> {0}" -f $remoteKey)
} catch {
throw ("Shard '{0}': {1}" -f $entry.name, $_.Exception.Message)
}
} -ThrottleLimit $effectiveParallelTransfers
} else {
Write-Host "Uploading shards to Azure Blob Storage container '$ContainerName'..."
foreach ($entry in $manifestShards) {
$localPath = Join-Path -Path $localShardRoot -ChildPath $entry.name
$remoteKey = Combine-StoragePath -Prefix $normalizedShardPrefix -Name $entry.name
Write-Host (" -> {0}" -f $remoteKey)
Upload-AzureBlob -Account $StorageAccountName -Container $ContainerName -Sas $SasToken -BlobName $remoteKey -FilePath $localPath -ContentType 'text/plain'
}
}
Write-Host ("Uploading manifest to {0}" -f $normalizedManifestRemote)
@@ -1188,12 +1259,36 @@ switch ($StorageProvider.ToUpperInvariant()) {
throw 's3AccessKeyId and s3SecretAccessKey are required for S3 uploads.'
}
Write-Host "Uploading shards to S3 bucket '$S3BucketName'..."
foreach ($entry in $manifestShards) {
$localPath = Join-Path -Path $localShardRoot -ChildPath $entry.name
$remoteKey = Combine-StoragePath -Prefix $normalizedShardPrefix -Name $entry.name
Write-Host (" -> {0}" -f $remoteKey)
Invoke-S3HttpUpload -EndpointUrl $S3EndpointUrl -Bucket $S3BucketName -Key $remoteKey -FilePath $localPath -Region $S3Region -AccessKeyId $S3AccessKeyId -SecretAccessKey $S3SecretAccessKey -ForcePathStyle $S3ForcePathStyle -PayloadHash $entry.sha256 -ContentType 'text/plain'
if ($parallelTransfersEnabled) {
Write-Host ("Uploading shards to S3 bucket '{0}' with up to {1} concurrent transfer(s)..." -f $S3BucketName, $effectiveParallelTransfers)
$prefixForParallelUpload = if ([string]::IsNullOrWhiteSpace($normalizedShardPrefix)) { $null } else { $normalizedShardPrefix.Replace('\', '/').Trim('/') }
$manifestShards | ForEach-Object -Parallel {
param($entry)
try {
foreach ($helper in $using:parallelS3UploadHelperList) {
if (-not (Get-Command $helper.Name -ErrorAction SilentlyContinue)) {
Invoke-Expression $helper.Definition
}
}
$localPath = Join-Path -Path $using:localShardRoot -ChildPath $entry.name
$remoteKey = $entry.name.Replace('\', '/').TrimStart('/')
if (-not [string]::IsNullOrWhiteSpace($using:prefixForParallelUpload)) {
$remoteKey = $using:prefixForParallelUpload + '/' + $remoteKey
}
Invoke-S3HttpUpload -EndpointUrl $using:S3EndpointUrl -Bucket $using:S3BucketName -Key $remoteKey -FilePath $localPath -Region $using:S3Region -AccessKeyId $using:S3AccessKeyId -SecretAccessKey $using:S3SecretAccessKey -ForcePathStyle $using:S3ForcePathStyle -PayloadHash $entry.sha256 -ContentType 'text/plain'
Write-Host (" -> {0}" -f $remoteKey)
} catch {
throw ("Shard '{0}': {1}" -f $entry.name, $_.Exception.Message)
}
} -ThrottleLimit $effectiveParallelTransfers
} else {
Write-Host "Uploading shards to S3 bucket '$S3BucketName'..."
foreach ($entry in $manifestShards) {
$localPath = Join-Path -Path $localShardRoot -ChildPath $entry.name
$remoteKey = Combine-StoragePath -Prefix $normalizedShardPrefix -Name $entry.name
Write-Host (" -> {0}" -f $remoteKey)
Invoke-S3HttpUpload -EndpointUrl $S3EndpointUrl -Bucket $S3BucketName -Key $remoteKey -FilePath $localPath -Region $S3Region -AccessKeyId $S3AccessKeyId -SecretAccessKey $S3SecretAccessKey -ForcePathStyle $S3ForcePathStyle -PayloadHash $entry.sha256 -ContentType 'text/plain'
}
}
Write-Host ("Uploading manifest to {0}" -f $normalizedManifestRemote)

View File

@@ -22,7 +22,7 @@ During first run, the tool will ask for passphrase that will be used to encrypt/
After installation, edit ElysiumSettings.txt, check all variables and add domains to test.
### Update Known-Hashed Database (KHDB)
Run script Elysium.ps1 as an administrator and choose option 1 (Update Known-Hashes Database).
The updater now pulls a manifest plus individual hash shards (two-hex prefix layout) from the configured storage (Azure Blob or S3-compatible), verifies checksums, replaces only changed shards, and rebuilds `khdb.txt` for local use. Deleted shards listed in the manifest are removed automatically.
The updater now pulls a manifest plus individual hash shards (two-hex prefix layout) from the configured storage (Azure Blob or S3-compatible), verifies checksums, replaces only changed shards, and rebuilds `khdb.txt` for local use. Deleted shards listed in the manifest are removed automatically. When PowerShell 7 is available the downloader automatically fetches up to `-MaxParallelTransfers` shards in parallel (default `5`); on Windows PowerShell 5.1 it reverts to the original sequential behavior. Override the concurrency as needed when running the script directly (for example `.\Update-KHDB.ps1 -MaxParallelTransfers 8`).
To publish an updated shard set, run `Prepare-KHDBStorage.ps1` against your sorted `khdb.txt` (or point it at the directory/list of the Have I Been Pwned `.gz` slices). The helper reconstructs the full 32hex NTLM values (prefix + remainder), deduplicates per hash (keeping the largest count), splits by the first two hex characters, writes a manifest (`version`, `sha256`, `size`, entry counts), and can upload the resulting files directly to Azure Blob Storage (via SAS) or S3-compatible endpoints using SigV4. Invalid or malformed entries are omitted automatically, and a short report (aggregate counts + `invalid-hashes.txt`) is produced for review. Example:
@@ -38,7 +38,7 @@ To publish an updated shard set, run `Prepare-KHDBStorage.ps1` against your sort
-ShardRemotePrefix khdb/shards
```
Use `-SkipUpload` to stage files locally, or `-StorageProvider Azure` with `storageAccountName`/`containerName`/`sasToken` when targeting Azure Blob Storage. Add `-ShowProgress` (optionally tune `-ProgressUpdateInterval`) if you want a running `Write-Progress` indicator while the hashes are being split. Pass `-ForcePlainText` when your `khdb.txt` already contains complete hashes and you want `.gz` references treated as invalid instead of being expanded. When you only need to push an already prepared package, combine `-UploadOnly` with `-OutputRoot` pointing at the existing shard directory and choose the storage provider to perform an upload-only run. Missing storage values are pulled from `ElysiumSettings.txt` automatically (override the path with `-SettingsPath`) so you dont have to retype S3/Azure credentials for every run.
Use `-SkipUpload` to stage files locally, or `-StorageProvider Azure` with `storageAccountName`/`containerName`/`sasToken` when targeting Azure Blob Storage. Add `-ShowProgress` (optionally tune `-ProgressUpdateInterval`) if you want a running `Write-Progress` indicator while the hashes are being split. Pass `-ForcePlainText` when your `khdb.txt` already contains complete hashes and you want `.gz` references treated as invalid instead of being expanded. When you only need to push an already prepared package, combine `-UploadOnly` with `-OutputRoot` pointing at the existing shard directory and choose the storage provider to perform an upload-only run. Missing storage values are pulled from `ElysiumSettings.txt` automatically (override the path with `-SettingsPath`) so you dont have to retype S3/Azure credentials for every run. On PowerShell 7, `Prepare-KHDBStorage.ps1` can push shards concurrently by setting `-MaxParallelTransfers` (default `5`); Windows PowerShell 5.1 automatically falls back to serial uploads.
Every run also emits a cleaned, DSInternals-friendly `khdb-clean.txt` beside the shards so you can inspect or distribute the merged list before publishing.

View File

@@ -103,6 +103,12 @@ function Ensure-AWSS3Module {
throw "AWS Tools for PowerShell not found. Install with: Install-Module AWS.Tools.S3 -Scope CurrentUser"
}
function Get-FunctionDefinitionText {
param([Parameter(Mandatory = $true)][string]$Name)
$cmd = Get-Command -Name $Name -CommandType Function -ErrorAction Stop
return $cmd.ScriptBlock.Ast.Extent.Text
}
function New-S3Client {
param(
[string]$EndpointUrl,
@@ -502,12 +508,57 @@ function Remove-EmptyDirectories {
}
function Update-KHDB {
param(
[ValidateRange(1, 64)]
[int]$MaxParallelTransfers = 5
)
Start-UpdateTranscript -BasePath $scriptRoot
try {
$settings = Read-ElysiumSettings
$installPath = Get-InstallationPath $settings
Ensure-Directory $installPath
$psSupportsParallel = ($PSVersionTable.PSVersion.Major -ge 7)
$effectiveParallelTransfers = if ($MaxParallelTransfers -lt 1) { 1 } else { [int]$MaxParallelTransfers }
$parallelDownloadsEnabled = $psSupportsParallel -and $effectiveParallelTransfers -gt 1
if (-not $psSupportsParallel -and $effectiveParallelTransfers -gt 1) {
Write-Verbose "Parallel transfers requested but PowerShell $($PSVersionTable.PSVersion) does not support ForEach-Object -Parallel; using serial downloads."
}
$parallelAzureDownloadHelpers = $null
$parallelAzureDownloadHelperList = @()
$parallelS3DownloadHelpers = $null
$parallelS3DownloadHelperList = @()
if ($parallelDownloadsEnabled) {
$parallelAzureDownloadHelpers = @{
'Build-BlobUri' = Get-FunctionDefinitionText 'Build-BlobUri'
'Invoke-DownloadWithRetry' = Get-FunctionDefinitionText 'Invoke-DownloadWithRetry'
'New-HttpClient' = Get-FunctionDefinitionText 'New-HttpClient'
'Get-FileSha256Lower' = Get-FunctionDefinitionText 'Get-FileSha256Lower'
}
$parallelAzureDownloadHelperList = $parallelAzureDownloadHelpers.GetEnumerator() | ForEach-Object {
[pscustomobject]@{ Name = $_.Key; Definition = $_.Value }
}
$parallelS3DownloadHelpers = @{}
@(
'Get-Bytes',
'Get-HashHex',
'HmacSha256',
'ToHex',
'GetSignatureKey',
'UriEncode',
'BuildCanonicalPath',
'BuildAuthHeaders',
'BuildS3Uri',
'Invoke-S3HttpDownloadWithRetry',
'Get-FileSha256Lower'
) | ForEach-Object {
$parallelS3DownloadHelpers[$_] = Get-FunctionDefinitionText $_
}
$parallelS3DownloadHelperList = $parallelS3DownloadHelpers.GetEnumerator() | ForEach-Object {
[pscustomobject]@{ Name = $_.Key; Definition = $_.Value }
}
}
$storageProvider = $settings['StorageProvider']
if ([string]::IsNullOrWhiteSpace($storageProvider)) { $storageProvider = 'Azure' }
@@ -531,6 +582,17 @@ function Update-KHDB {
Write-Host "Fetching manifest ($manifestBlobPath) from $storageProvider storage..."
$s3Bucket = $null
$s3EndpointUrl = $null
$s3Region = $null
$s3AK = $null
$s3SK = $null
$forcePathStyle = $true
$s3UseAwsTools = $false
$storageAccountName = $null
$containerName = $null
$sasToken = $null
if ($storageProvider -ieq 'S3') {
$s3Bucket = $settings['s3BucketName']
$s3EndpointUrl = $settings['s3EndpointUrl']
@@ -545,6 +607,10 @@ function Update-KHDB {
if ([string]::IsNullOrWhiteSpace($s3EndpointUrl)) { throw 's3EndpointUrl is required for S3-compatible storage.' }
$forcePathStyle = Get-BooleanSetting -Value $s3Force -Default $true
try { $s3UseAwsTools = [System.Convert]::ToBoolean($s3UseAwsTools) } catch { $s3UseAwsTools = $false }
if ($parallelDownloadsEnabled -and $s3UseAwsTools) {
Write-Warning 'Parallel shard downloads require the SigV4 HTTP path; disabling AWS Tools mode for this run.'
$s3UseAwsTools = $false
}
$downloadKey = Combine-StoragePath -Prefix $null -Name $manifestBlobPath
if ($s3UseAwsTools) {
@@ -603,7 +669,7 @@ function Update-KHDB {
}
}
$downloadQueue = New-Object System.Collections.Generic.List[psobject]
$downloadQueue = [System.Collections.ArrayList]::new()
$remoteNameSet = New-Object 'System.Collections.Generic.HashSet[string]' ([System.StringComparer]::OrdinalIgnoreCase)
foreach ($entry in $manifest.shards) {
$name = [string]$entry.name
@@ -633,10 +699,14 @@ function Update-KHDB {
}
}
if ($needsDownload) {
$downloadQueue.Add($entry)
}
if ($needsDownload) {
[void]$downloadQueue.Add([pscustomobject]@{
Name = $name
Sha256 = $expectedHash
Size = $expectedSize
})
}
}
if ($downloadQueue.Count -gt 0) {
Write-Host ("{0} shard(s) require download or refresh." -f $downloadQueue.Count)
@@ -660,74 +730,140 @@ function Update-KHDB {
$useAwsTools = $settings['s3UseAwsTools']
try { $useAwsTools = [System.Convert]::ToBoolean($useAwsTools) } catch { $useAwsTools = $false }
if ($downloadQueue.Count -gt 0) {
if ($downloadQueue.Count -gt 0 -and -not $parallelDownloadsEnabled) {
if ($useAwsTools) {
$storageClient = New-S3Client -EndpointUrl $s3EndpointUrl -Region $s3Region -AccessKeyId $s3AK -SecretAccessKey $s3SK -ForcePathStyle:$forcePathStyle
} else {
$storageHttpClient = @{
Endpoint = $s3EndpointUrl
Bucket = $s3Bucket
Region = $s3Region
AccessKey = $s3AK
SecretKey = $s3SK
ForcePath = $forcePathStyle
}
}
$storageHttpClient = @{
Endpoint = $s3EndpointUrl
Bucket = $s3Bucket
Region = $s3Region
AccessKey = $s3AK
SecretKey = $s3SK
ForcePath = $forcePathStyle
}
}
} else {
if ($downloadQueue.Count -gt 0) {
if ($downloadQueue.Count -gt 0 -and -not $parallelDownloadsEnabled) {
$storageHttpClient = New-HttpClient
}
}
$downloadIndex = 0
foreach ($entry in $downloadQueue) {
$downloadIndex++
$name = [string]$entry.name
$expectedHash = ([string]$entry.sha256).ToLowerInvariant()
$expectedSize = 0L
[void][long]::TryParse([string]$entry.size, [ref]$expectedSize)
if ($parallelDownloadsEnabled -and $downloadQueue.Count -gt 0) {
Write-Host ("Downloading shards with up to {0} concurrent transfer(s)..." -f $effectiveParallelTransfers)
$remotePrefixForParallel = if ([string]::IsNullOrWhiteSpace($remoteShardPrefix)) { $null } else { $remoteShardPrefix.Replace('\', '/').Trim('/') }
$parallelDownloadHelpers = if ($isS3) { $parallelS3DownloadHelperList } else { $parallelAzureDownloadHelperList }
$downloadQueue.ToArray() | ForEach-Object -Parallel {
$entry = $PSItem
try {
if ($null -eq $entry) { return }
foreach ($helper in $using:parallelDownloadHelpers) {
if (-not (Get-Command $helper.Name -ErrorAction SilentlyContinue)) {
Invoke-Expression $helper.Definition
}
}
$name = [string]$entry.Name
if ([string]::IsNullOrWhiteSpace($name)) {
throw "Parallel shard entry missing name: $(ConvertTo-Json $entry -Compress)"
}
$expectedHash = ([string]$entry.Sha256).ToLowerInvariant()
$expectedSize = [long]$entry.Size
$activity = "Downloading shard $downloadIndex/$($downloadQueue.Count): $name"
$remoteKey = Combine-StoragePath -Prefix $remoteShardPrefix -Name $name
$stagingPath = Join-Path -Path $downloadTempRoot -ChildPath $name
Ensure-Directory (Split-Path -Path $stagingPath -Parent)
$remoteKey = $name.Replace('\', '/').TrimStart('/')
if (-not [string]::IsNullOrWhiteSpace($using:remotePrefixForParallel)) {
$remoteKey = $using:remotePrefixForParallel + '/' + $remoteKey
}
$stagingPath = Join-Path -Path $using:downloadTempRoot -ChildPath $name
$stagingParent = Split-Path -Path $stagingPath -Parent
if ($stagingParent -and -not (Test-Path -LiteralPath $stagingParent)) {
[System.IO.Directory]::CreateDirectory($stagingParent) | Out-Null
}
if ($isS3) {
if ($storageClient) {
try {
$request = New-Object Amazon.S3.Model.GetObjectRequest -Property @{ BucketName = $settings['s3BucketName']; Key = $remoteKey }
$response = $storageClient.GetObject($request)
try { $response.WriteResponseStreamToFile($stagingPath, $true) } finally { $response.Dispose() }
} catch {
Write-Warning "AWS Tools download failed for shard '$name': $($_.Exception.Message). Falling back to SigV4 HTTP."
$activity = ("Downloading shard: {0}" -f $name)
if ($using:isS3) {
Invoke-S3HttpDownloadWithRetry -EndpointUrl $using:s3EndpointUrl -Bucket $using:s3Bucket -Key $remoteKey -TargetPath $stagingPath -Region $using:s3Region -AccessKeyId $using:s3AK -SecretAccessKey $using:s3SK -ForcePathStyle:$using:forcePathStyle -Activity $activity
} else {
$client = $null
try {
$client = New-HttpClient
$blobUri = Build-BlobUri -Account $using:storageAccountName -Container $using:containerName -Sas $using:sasToken -BlobName $remoteKey
Invoke-DownloadWithRetry -Client $client -Uri $blobUri -TargetPath $stagingPath -Activity $activity
} finally {
if ($client) { $client.Dispose() }
}
}
$downloadInfo = Get-Item -LiteralPath $stagingPath
if ($downloadInfo.Length -ne $expectedSize) {
throw "Shard '$name' size mismatch. Expected $expectedSize bytes, got $($downloadInfo.Length)."
}
$actualHash = Get-FileSha256Lower -Path $stagingPath
if ($actualHash -ne $expectedHash) {
throw "Shard '$name' checksum mismatch. Expected $expectedHash, got $actualHash."
}
$finalPath = Join-Path -Path $using:localShardRoot -ChildPath $name
$parentDir = Split-Path -Path $finalPath -Parent
if ($parentDir -and -not (Test-Path -LiteralPath $parentDir)) {
[System.IO.Directory]::CreateDirectory($parentDir) | Out-Null
}
Move-Item -LiteralPath $stagingPath -Destination $finalPath -Force
Write-Host ("Shard '{0}' updated." -f $name)
} catch {
throw ("Shard '{0}': {1}" -f $entry.name, $_.Exception.Message)
}
} -ThrottleLimit $effectiveParallelTransfers
} else {
$downloadIndex = 0
foreach ($entry in $downloadQueue.ToArray()) {
$downloadIndex++
if ($null -eq $entry) { continue }
$name = [string]$entry.Name
if ([string]::IsNullOrWhiteSpace($name)) {
throw "Shard entry missing name: $(ConvertTo-Json $entry -Compress)"
}
$expectedHash = ([string]$entry.Sha256).ToLowerInvariant()
$expectedSize = [long]$entry.Size
$activity = "Downloading shard $downloadIndex/$($downloadQueue.Count): $name"
$remoteKey = Combine-StoragePath -Prefix $remoteShardPrefix -Name $name
$stagingPath = Join-Path -Path $downloadTempRoot -ChildPath $name
Ensure-Directory (Split-Path -Path $stagingPath -Parent)
if ($isS3) {
if ($storageClient) {
try {
$request = New-Object Amazon.S3.Model.GetObjectRequest -Property @{ BucketName = $s3Bucket; Key = $remoteKey }
$response = $storageClient.GetObject($request)
try { $response.WriteResponseStreamToFile($stagingPath, $true) } finally { $response.Dispose() }
} catch {
Write-Warning "AWS Tools download failed for shard '$name': $($_.Exception.Message). Falling back to SigV4 HTTP."
Invoke-S3HttpDownloadWithRetry -EndpointUrl $storageHttpClient.Endpoint -Bucket $storageHttpClient.Bucket -Key $remoteKey -TargetPath $stagingPath -Region $storageHttpClient.Region -AccessKeyId $storageHttpClient.AccessKey -SecretAccessKey $storageHttpClient.SecretKey -ForcePathStyle:$storageHttpClient.ForcePath -Activity $activity
}
} else {
Invoke-S3HttpDownloadWithRetry -EndpointUrl $storageHttpClient.Endpoint -Bucket $storageHttpClient.Bucket -Key $remoteKey -TargetPath $stagingPath -Region $storageHttpClient.Region -AccessKeyId $storageHttpClient.AccessKey -SecretAccessKey $storageHttpClient.SecretKey -ForcePathStyle:$storageHttpClient.ForcePath -Activity $activity
}
} else {
Invoke-S3HttpDownloadWithRetry -EndpointUrl $storageHttpClient.Endpoint -Bucket $storageHttpClient.Bucket -Key $remoteKey -TargetPath $stagingPath -Region $storageHttpClient.Region -AccessKeyId $storageHttpClient.AccessKey -SecretAccessKey $storageHttpClient.SecretKey -ForcePathStyle:$storageHttpClient.ForcePath -Activity $activity
$blobUri = Build-BlobUri -Account $storageAccountName -Container $containerName -Sas $sasToken -BlobName $remoteKey
Invoke-DownloadWithRetry -Client $storageHttpClient -Uri $blobUri -TargetPath $stagingPath -Activity $activity
}
} else {
$storageAccountName = $settings['storageAccountName']
$containerName = $settings['containerName']
$sasToken = $settings['sasToken']
$blobUri = Build-BlobUri -Account $storageAccountName -Container $containerName -Sas $sasToken -BlobName $remoteKey
Invoke-DownloadWithRetry -Client $storageHttpClient -Uri $blobUri -TargetPath $stagingPath -Activity $activity
}
$downloadInfo = Get-Item -LiteralPath $stagingPath
if ($downloadInfo.Length -ne $expectedSize) {
throw "Shard '$name' size mismatch. Expected $expectedSize bytes, got $($downloadInfo.Length)."
}
$downloadInfo = Get-Item -LiteralPath $stagingPath
if ($downloadInfo.Length -ne $expectedSize) {
throw "Shard '$name' size mismatch. Expected $expectedSize bytes, got $($downloadInfo.Length)."
}
$actualHash = Get-FileSha256Lower -Path $stagingPath
if ($actualHash -ne $expectedHash) {
throw "Shard '$name' checksum mismatch. Expected $expectedHash, got $actualHash."
}
$actualHash = Get-FileSha256Lower -Path $stagingPath
if ($actualHash -ne $expectedHash) {
throw "Shard '$name' checksum mismatch. Expected $expectedHash, got $actualHash."
}
$finalPath = Join-Path -Path $localShardRoot -ChildPath $name
Ensure-Directory (Split-Path -Path $finalPath -Parent)
Move-Item -LiteralPath $stagingPath -Destination $finalPath -Force
Write-Host ("Shard '{0}' updated." -f $name)
$finalPath = Join-Path -Path $localShardRoot -ChildPath $name
Ensure-Directory (Split-Path -Path $finalPath -Parent)
Move-Item -LiteralPath $stagingPath -Destination $finalPath -Force
Write-Host ("Shard '{0}' updated." -f $name)
}
}
} finally {
if ($storageClient) { $storageClient.Dispose() }