minHash implementation in PowerShell

Could not have done it without the Better Programming article.

function minHash($a, $b, $shingleLength=3, [switch]$caseInsensitive=$False) {
  # adapted from https://betterprogramming.pub/identify-similarities-between-sentences-in-python-e9f71d454d1d
  function jaccardDistance($a, $b) {
    $toTest    = [System.Collections.Generic.HashSet[string]] @($b)
    $intersect = [System.Collections.Generic.HashSet[string]] @($a)
    $intersect.IntersectWith($toTest)
    $union     = [System.Collections.Generic.HashSet[string]] @($a)
    $union.UnionWith($toTest)
    if($union.Count -eq 0) { return $null }
    return $intersect.Count / $union.Count
  }

  function shingles($s, $len=3, [switch]$caseInsensitive=$False) {
    # courtesy https://stackoverflow.com/a/29127088
    $set = New-Object System.Collections.Generic.HashSet[string]
    if($caseInsensitive) { $s = $s.toUpper() }
    $sArray = $s.Trim().toCharArray()

    # I would hope there's a more elegant way to do this, but it works
    for($i = 0; $i -le ($sArray.length - $len); $i++) {
      $set.Add($sArray[$i..$($i + $len - 1)])|out-null
    }
    return $set
  }

  if($caseInsensitive) {
  return jaccardDistance                               `
    (shingles $a -len $shingleLength -caseInsensitive) `
    (shingles $b -len $shingleLength -caseInsensitive)
  }
  return jaccardDistance              `
    (shingles $a -len $shingleLength) `
    (shingles $b -len $shingleLength)
}