MPI Bitonic Sort

Exported from Notepad++

#include "bitonic_sorter.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctime>
#include <mpi.h>
#include <string.h>


void generateDataSet(int dataSet[],int size) {

    printf( "[0] creating dataset ...\n");
    srand ((unsigned int) time(NULL) );
    for ( int index = 0; index < size; ++index) {
        dataSet[index] = index;
    }
    

}

void randomizeData(int dataSet[],int tempDataSet[],int size){
    printf( "[0] dataset of size %d being randomized ...\n",size);
    for ( int index = 0; index < size; ++index) {
        tempDataSet[index] = rand();
    }

    SelectionSort(tempDataSet,dataSet, size);

}



void SelectionSort(int a[],int b[], int size)
{
     int i;
     for (i = 0; i < size - 1; ++i)
     {
          int j, min, tempA,tempB;
          min = i;
          for (j = i+1; j < size; ++j)
          {
               if (a[j] < a[min])
                    min = j;
          }
          tempB = b[i];
          tempA = a[i];

          a[i] = a[min];
          b[i] = b[min];

          a[min] = tempA;
          b[min] = tempB;
     }
}

void masterHandshake(char buff[],int numprocs,int BUFSIZE,int TAG,MPI_Status &stat){
     for(int i=1;i<numprocs;i++)
         {
           sprintf(buff, "hey %d! ", i);
           MPI_Send(buff, BUFSIZE, MPI_CHAR, i, TAG, MPI_COMM_WORLD);
         }
         for(int i=1;i<numprocs;i++)
         {
           MPI_Recv(buff, BUFSIZE, MPI_CHAR, i, TAG, MPI_COMM_WORLD, &stat);
           printf("[%d]: %s\n", i, buff);
         }
}


void slaveHandshake(char buff[],int numprocs,int BUFSIZE,int TAG,MPI_Status &stat,int myid){
    // receive from rank 0
    char idstr[32];
     MPI_Recv(buff, BUFSIZE, MPI_CHAR, 0, TAG, MPI_COMM_WORLD, &stat);
     sprintf(idstr, "Processor %d ", myid);
     strncat(buff, idstr, BUFSIZE-1);
     strncat(buff, "reporting for duty", BUFSIZE-1);
     // send to rank 0
     MPI_Send(buff, BUFSIZE, MPI_CHAR, 0, TAG, MPI_COMM_WORLD);
}

void distributeIntArray(int numprocs,int dataSet[],int SIZE){

    for(int dest = 1; dest <= numprocs; dest++){
        printf("sending data to processor %ld, size = %ld\n",dest,SIZE/(numprocs-1));
             
                 //MPI_Send(dataSet+(dest/2), SIZE / (numprocs-1), MPI_INT, 1, dest, MPI_COMM_WORLD);//11
                 MPI_Send(dataSet, SIZE, MPI_INT, dest, 1, MPI_COMM_WORLD);
                 
             
         }
    printf("sending data to p");
                 MPI_Finalize();
}
void sendIntArray(int numprocs,int dataSet[],int SIZE,int target){

    for(int dest = 1; dest <= numprocs; dest++){
        printf("sending data to processor %ld, size = %ld\n",dest,SIZE/(numprocs-1));
             
                 //MPI_Send(dataSet+(dest/2), SIZE / (numprocs-1), MPI_INT, 1, dest, MPI_COMM_WORLD);//11
                 MPI_Send(dataSet, SIZE, MPI_INT, dest, 1, MPI_COMM_WORLD);
                 
             
         }
    printf("sending data to p");
                 MPI_Finalize();
}

void recieveIntArray(int buf[],int len,MPI_Status *stat,int from){
    printf("check  \n");
    MPI_Recv(buf,  len, MPI_INT, from, 1,MPI_COMM_WORLD, stat); 
    
    printf("check1  %d\n",buf[63]);
    
}

void bitonicSort(int start, int len,int data[]) {
  if (len>1) {
    int split=len/2;
    bitonicSort(start, split,data);
    bitonicSort(start+split, split,data);
    merge(start, len,data);
  }
}

void merge(int start, int len,int data[]) {
  if (len>1) {
    int mid=len/2;
    int x;
    for (x=start; x<start+mid; x++)
    compareAndSwap(data,x, x+mid);
    merge(start, mid,data);
    merge(start+mid, start ,data);
  }
}
                        
void compareAndSwap(int data[],int i, int j) {
    int temp;
      if (data[i]>data[j]) 
        temp = data[i];
        data[i] = data[j];
        data[j] = temp;
}

bitonic_sorter.h

bitonic_sorter.cpp

Exported from Notepad++

#include "bitonic_sorter.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctime>
#include <mpi.h>
#include <string.h>


void generateDataSet(int dataSet[],int size) {

    printf( "[0] creating dataset ...\n");
    srand ((unsigned int) time(NULL) );
    for ( int index = 0; index < size; ++index) {
        dataSet[index] = index;
    }
    

}

void randomizeData(int dataSet[],int tempDataSet[],int size){
    printf( "[0] dataset of size %d being randomized ...\n",size);
    for ( int index = 0; index < size; ++index) {
        tempDataSet[index] = rand();
    }

    SelectionSort(tempDataSet,dataSet, size);

}



void SelectionSort(int a[],int b[], int size)
{
     int i;
     for (i = 0; i < size - 1; ++i)
     {
          int j, min, tempA,tempB;
          min = i;
          for (j = i+1; j < size; ++j)
          {
               if (a[j] < a[min])
                    min = j;
          }
          tempB = b[i];
          tempA = a[i];

          a[i] = a[min];
          b[i] = b[min];

          a[min] = tempA;
          b[min] = tempB;
     }
}

void masterHandshake(char buff[],int numprocs,int BUFSIZE,int TAG,MPI_Status &stat){
     for(int i=1;i<numprocs;i++)
         {
           sprintf(buff, "hey %d! ", i);
           MPI_Send(buff, BUFSIZE, MPI_CHAR, i, TAG, MPI_COMM_WORLD);
         }
         for(int i=1;i<numprocs;i++)
         {
           MPI_Recv(buff, BUFSIZE, MPI_CHAR, i, TAG, MPI_COMM_WORLD, &stat);
           printf("[%d]: %s\n", i, buff);
         }
}


void slaveHandshake(char buff[],int numprocs,int BUFSIZE,int TAG,MPI_Status &stat,int myid){
    // receive from rank 0
    char idstr[32];
     MPI_Recv(buff, BUFSIZE, MPI_CHAR, 0, TAG, MPI_COMM_WORLD, &stat);
     sprintf(idstr, "Processor %d ", myid);
     strncat(buff, idstr, BUFSIZE-1);
     strncat(buff, "reporting for duty", BUFSIZE-1);
     // send to rank 0
     MPI_Send(buff, BUFSIZE, MPI_CHAR, 0, TAG, MPI_COMM_WORLD);
}

void distributeIntArray(int numprocs,int dataSet[],int SIZE){

    for(int dest = 1; dest <= numprocs; dest++){
        printf("sending data to processor %ld, size = %ld\n",dest,SIZE/(numprocs-1));
             
                 MPI_Send(dataSet, SIZE, MPI_INT, dest, 1, MPI_COMM_WORLD);
                 
             
         }
    printf("sending data to p");
                 MPI_Finalize();
}
void sendIntArray(int numprocs,int dataSet[],int SIZE,int target){

    for(int dest = 1; dest <= numprocs; dest++){
        printf("sending data to processor %ld, size = %ld\n",dest,SIZE/(numprocs-1));
             
                 MPI_Send(dataSet, SIZE, MPI_INT, dest, 1, MPI_COMM_WORLD);
                 
             
         }
    printf("sending data to p");
                 MPI_Finalize();
}

void recieveIntArray(int buf[],int len,MPI_Status *stat,int from){
    printf("check  \n");
    MPI_Recv(buf,  len, MPI_INT, from, 1,MPI_COMM_WORLD, stat); 
    
    printf("check1  %d\n",buf[63]);
    
}

void bitonicSort(int start, int len,int data[]) {
  if (len>1) {
    int split=len/2;
    bitonicSort(start, split,data);
    bitonicSort(start+split, split,data);
    merge(start, len,data);
  }
}

void merge(int start, int len,int data[]) {
  if (len>1) {
    int mid=len/2;
    int x;
    for (x=start; x<start+mid; x++)
    compareAndSwap(data,x, x+mid);
    merge(start, mid,data);
    merge(start+mid, start ,data);
  }
}
                        
void compareAndSwap(int data[],int i, int j) {
    int temp;
      if (data[i]>data[j]) 
        temp = data[i];
        data[i] = data[j];
        data[j] = temp;
}

Example Using 8 Slave Processes

Code Explanation

Firstly we need to look at what the code must accomplish. Since this is most of a demonstration of MPI than trying to get the absolute fastest sort we will only be using MPI for parrallelization. And although MPI supports it we will not use shared memory since openMP does that better. It is optimized to run on a cluster or supercomputer.

The solution: First we start by dividing the dataset into n parts, where n is the number of slave processes available. Then those parts are sent to their respective process. Then in unison each slave processor the portion of data it recieved from the master. Now that we have n blocks of sorted data we must merge them to create a single contiguous result. The data will be sorted globally in log₂n steps. These steps will follow the standard bitonic sort pattern. The first (n²-n)/2 steps are to create a bitonic sequence, and the rest are to sort it. If you follow the above picture or the below pseudocode you can see each process makes a number of swaps with different "partners".

In the above image each arrow represents a comparehigh or comparelow call. In these methods the data from the partners is sent to each other and then each one calls a merge funtion, but only merges half of the data. The way in which the low and high function versions differentiate is in the merge order. Low starts merging from the low end and high starts from the high end. They both finish when they have merged half of the combined number of elements in each list. The pseudocode for this is a follows:

once that all completes the data from all processes is sent back to the master and is in order. Due to the way the data is sorted this is an unstable sorting algorithm

This method is designed to be run on a cluster computer system. If it were to be run on a standard computer it would suffer because there would be several redundent operations. Mainly you would have duplicated data at every bitonic step. Additionally unless you spawn the same number of processes as you have available threads waiting on processes to complete will create a bottleneck.

Bitonic Sort: MPI Implementation

Contents

MPI Bitonic Sort Code

main.cpp

bitonic_sorter.h

bitonic_sorter.cpp

Example Using 8 Slave Processes

Code Explanation

Sources