import numpy as np
from numba import jit
from numba import cuda

@cuda.jit
 # define function for 1D array
def Threads1DArray(A):
    
     tx = cuda.threadIdx.x # thread id      
     bx = cuda.blockIdx.x # block id
     gx = cuda.blockDim.x # dimension of block
    
     pos = tx+bx*gx
     A[pos] = pos

@cuda.jit
def Threads2DArray(A):

     tx = cuda.threadIdx.x
     ty = cuda.threadIdx.y
     bx = cuda.blockIdx.x
     by = cuda.blockIdx.y
     gx = cuda.blockDim.x    
     gy = cuda.blockDim.y
    
     col = tx+bx*gx
     lin = ty+by*gy
    
     A[lin][col] = col+lin
    
    

N=100
A = np.zeros(N)
print(A)
# Example A, 1D array, N threads in one block
blocksPerGrid=1
ThreadsPerBlock=N
Threads1DArray[blocksPerGrid,ThreadsPerBlock](A)

print(A)
# # Example B. 1D array blocksPerGrid=5 blocks and ThreadsPerBlock=N/blocksPerGrid threads in one block
# blocksPerGrid=5
# ThreadsPerBlock=int(N/blocksPerGrid)
# Threads1DArray[blocksPerGrid,ThreadsPerBlock](A)

# # Example C. 2D array divide matrix to two parts vertically
# N = 6
# ThreadsPerBlock=(3,6)
# blocksPerGrid=(2,1)
# B = np.zeros((N,N))
# Threads2DArray[blocksPerGrid,ThreadsPerBlock](B)

# # Example D. 2D array divide matrix to 4 same parts
# N = 6
# ThreadsPerBlock=(3,3)
# blocksPerGrid=(2,2)
# B = np.zeros((N,N))
# Threads2DArray[blocksPerGrid,ThreadsPerBlock](B)

# # Example E. 2D array divide matrix to 6 same parts
# N = 6
# ThreadsPerBlock=(3,2)
# blocksPerGrid=(2,3)
# B = np.zeros((N,N))
# Threads2DArray[blocksPerGrid,ThreadsPerBlock](B)

# # Example F. 2D array divide matrix to 3 same parts horizontally
# N = 6
# ThreadsPerBlock=(6,2)
# blocksPerGrid=(1,3)
# B = np.zeros((N,N))
# Threads2DArray[blocksPerGrid,ThreadsPerBlock](B)


