If your looking for even faster, you can use numba and cuda as well ifyou have a Nvidia processor. Feel free to optimize as needed. We use 2**24 which is ~16 million numbers at 239 ms on an Nvidia 3070.
from numba import cudaimport numpy as np@cuda.jitdef findprimes(primes, sqrt_limit): index = cuda.grid(1) if index >= primes.size or index < 2: return if index <= sqrt_limit: if primes[index]: for multiple in range(index*index, primes.size, index): primes[multiple] = Falsedef fast_sieve_gpu(m): primes = np.ones(m, dtype=bool) primes[0] = primes[1] = False sqrt_limit = int(np.sqrt(m)) d_primes = cuda.to_device(primes) threads_per_block = 128 blocks_per_grid = (m + threads_per_block - 1) // threads_per_block findprimes[blocks_per_grid, threads_per_block](d_primes, sqrt_limit) primes = d_primes.copy_to_host() prime_numbers = np.nonzero(primes)[0] return prime_numbersm = 2**24 # 16777216prime_numbers = fast_sieve_gpu(m)print(prime_numbers)%timeit fast_sieve_gpu(m)
# Output (this is 2**24 which is 16x of 2**20) : # [ 2 3 5 ... 16777183 16777199 16777213]# 239 ms ± 2.66 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)