#include #include #include #include typedef struct { double prior; double sigma; int dimension; double *means; } state_of_nature; typedef struct { int class; double *data; } data_point; /* gen_uniform_01() * Generate random value in the range [0,1[ */ double gen_uniform_01(void) { return drand48(); } /* gen_uniform_ab(a,b) * Generate random value in the range [a,b[ */ double gen_uniform_ab(double a, double b) { return a + (b-a)*gen_uniform_01(); } /* gen_gaussian(mean, sigma) * Return a random value with the distribution N(mean,sigma) * where sigma is the standard deviation */ double gen_gaussian(double mean, double sigma) { double x, y, r2; do { x = 2.0*gen_uniform_01() - 1.0; y = 2.0*gen_uniform_01() - 1.0; r2 = x*x + y*y; } while (r2 >= 1.0 || r2 == 0.0); return mean + sigma * (x * sqrt(-2.0*log(r2)/r2)); } double *gen_values(state_of_nature *state) { int i; double *values = (double *) malloc(sizeof(double) * state->dimension); for (i=0; idimension; i++) { values[i] = gen_gaussian(state->means[i], state->sigma); } return values; } void gen_data_point(state_of_nature states[], data_point *dp) { int count = 0; state_of_nature *current = states; double selector = gen_uniform_01(); while (selector > current->prior) { selector -= current->prior; count++; current++; } dp->class = count; dp->data = gen_values(current); } state_of_nature *read_problem(FILE *file, int *num_classes) { int i, j, num_dimensions; double sigma; double total; state_of_nature *results; if (3 != fscanf(file, "%d %d %lf", num_classes, &num_dimensions, &sigma)) { fprintf(stderr, "Data format error - should be\n#classes #dims sigma\n"); exit(1); } results = (state_of_nature *) malloc(*num_classes * sizeof(state_of_nature)); for (i=0, total=0; i<*num_classes; i++) { if (1 != fscanf(file, "%lf", &results[i].prior)) { fprintf(stderr, "Data format error - expected prior.\n"); exit(1); } total += results[i].prior; results[i].sigma = sigma; results[i].dimension = num_dimensions; results[i].means = (double *) malloc(num_dimensions * sizeof(double)); for (j=0; jdimension, num_samples); for(i=0; i< num_samples; i++) { gen_data_point(alternatives, &dp); fprintf(stdout, "%d", dp.class); for (j=0; j < alternatives->dimension; j++) fprintf(stdout, " %lf", dp.data[j]); fprintf(stdout, "\n"); free(dp.data); } for (i = 0; i < num_classes; i++) { free(alternatives[i].means); } free(alternatives); exit(0); }