Assignment 2 GPU Course Lab

A simple CUDA program and discussions to all components.

#include <stdio.h>

// CUDA kernel: writes values to an array in GPU memory
__global__ void writeKernel(int *d_array, int value, int size) {
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    if (idx < size) {
        d_array[idx] = value + idx; // Each thread writes a unique value
    }
}

int main() {
    int size = 10; // Number of elements
    int bytes = size * sizeof(int); // Total memory size in bytes
    srand(time(NULL)); 

    int *h_array; // Host array
    int *d_array; // Device array

    // Allocate memory on the host
    h_array = (int*)malloc(bytes);

    // Allocate memory on the device (GPU)
    cudaMalloc((void**)&d_array, bytes);

    // Launch kernel with 1 block and 10 threads
    writeKernel<<<1, size>>>(d_array, rand()%100 + 1, size);

    // Copy data back from device to host
    cudaMemcpy(h_array, d_array, bytes, cudaMemcpyDeviceToHost);

    // Print the results
    printf("Values read from GPU:\n");
    for (int i = 0; i < size; i++) {
        printf("%d ", h_array[i]);
    }
    printf("\n");

    // Free allocated memory
    free(h_array);
    cudaFree(d_array);

    return 0;
}

Writing numbers to GPU with a random offset and reading it back and printing.