UFf.. pekne vidno ktora cast kodu je pisana rano a ktora v noci :D
K-means vs Kohonen
Porovnávali sme výsledky metód zhlukovania k-means a kohonenova sieť. Po porovnaní obidvoch zhlukovacích metód môžeme v niekoľkých vetách zhrnúť:
Pri náhodnej inicializácii k-means môže dôjst k stavu, že niektoré means sú inicializované tak neštastne, že sa nachádzajú ďaleko od vstupných dát. To ma za následok, že v kroku priradenia vstupných bodov k najbližším meansom sa stane, že týmto meansom nebude patriť ani jeden bod a teda sa ich poloha nemá ako zmeniť. Pravdepodobnosť výskytu takýchto vyradených meansov stňupa s celkovým počtom zvolených meansov. Tento nedostatok sa odstraňuje inicializáciou meansov na hodnotu náhodného vstupného bodu.
Čo sa týka rýchlosti, podstatne lepšie výsledky sme dosahovali pri metóde k-means. K-means zbehlo približne 10x rýchlejšie ako kohonen, pričom mu stačilo približne 20 cyklov.
Vizuálne výsledky obidvoch zhlukovacích metód sú veľmi podobné. Môžeme teda tvrdiť, že zhlukovanie do 9 meansov je akýmsi ekvivalentom ku zhlukovaniu pomocou kohonenovej siete veľkej 3x3.
Kohonen
K-means
Zdrojový súbor
#include <graphics.h>
#include <iostream>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <getopt.h>
#include <math.h>
#include <graphics.h>
#include <unistd.h>
#include <windows.h>
#include <dos.h>
#include <stdio.h>
#include <conio.h>
#include "image.h"
#define KMEANS_DEFAULT 25
#define TRAIN_CNT_DEFAULT 500
struct config
{
int train_cnt; /* pocet trenovacich dat */
char *pbm_file; /* pbm subor */
char *image_file; /* bmp,jpg,png subor */
};
struct kmeans
{
struct config *cfg;
double x[KMEANS_DEFAULT];
double y[KMEANS_DEFAULT];
};
void config_initx( struct config *cfg )
{
(cfg->train_cnt) = TRAIN_CNT_DEFAULT;
(cfg->pbm_file) = NULL;
(cfg->image_file) = NULL;
}
int config_parse_cmdline( struct config *cfg, int argc, char **argv )
{
char option;
char *tail;
if ( argc < 2 )
{
printf( "not enough arguments\n" );
return ( -1 );
}
while ( (option = getopt( argc, argv, "t:" ) ) != -1 )
{
switch (option)
{
case 't':
(cfg->train_cnt) = strtol( optarg, &tail, 10 );
printf( "training count: %d\n", (cfg->train_cnt ));
break;
case '?':
printf( "to parse: %c\n", optopt );
break;
default:
printf( "unrecognized option: %c\n", option );
return ( -1 );
}
}
if ( optind < argc )
{
(cfg->image_file) = argv[optind];
printf( "image: %s\n", (cfg->image_file) );
}
else
{
printf( "no image file specified\n" );
return ( -1 );
}
optind++;
if ( optind < argc )
{
(cfg->pbm_file) = argv[optind];
printf( "pbm: %s\n", (cfg->pbm_file) );
}
else
{
printf( "no pbm file specified\n" );
return ( -1 );
}
return ( 0 );
}
/* random weights */
void kmeans_init_weights( struct kmeans *kmeans )
{
int i;
int j;
for ( i=0; i<KMEANS_DEFAULT; i++ )
{
(kmeans->x)[i] = ( rand() % 1000 ) / (float)(1000-1);
(kmeans->y)[i] = ( rand() % 1000 ) / (float)(1000-1);
}
}
/* first k weights from input data */
void kmeans_init_weights_v2( struct kmeans *kmeans, float *vx, float *vy )
{
int i;
int j;
for ( i=0; i<KMEANS_DEFAULT; i++ )
{
(kmeans->x)[i] = vx[i];
(kmeans->y)[i] = vy[i];
}
}
static void train_vectors_draw( int training_cnt, float *vx, float *vy, int *mean, int maxx, int maxy )
{
int i;
for ( i = 0; i < training_cnt; i++ )
{
setcolor( mean[i] % 15 + 1 );
rectangle( (int)(vx[i] * maxx) - 2, (int)(vy[i] * maxy) - 2,
(int)(vx[i] * maxx) + 2, (int)(vy[i] * maxy) + 2 );
setfillstyle(SOLID_FILL, mean[i] % 15 + 1 );
floodfill( (int)(vx[i] * maxx), (int)(vy[i] * maxy), mean[i] % 15 + 1 );
}
return;
}
static int kmeans_off_spiral( struct kmeans *kmeans, struct pbm_image *pbm )
{
int i;
int off_spiral = 0;
for ( i = 0; i < KMEANS_DEFAULT; i++ )
{
if ( ! pbm_image_is_black_point( pbm, (int) ((kmeans->x)[i] * pbm->width), (int) ((kmeans->y)[i] * pbm->height) ) )
{
off_spiral++;
}
}
return ( off_spiral );
}
static void kmeans_redraw( struct kmeans *kmeans, int maxx, int maxy )
{
int i;
# define KMEAN_RADIUS 6
for ( i = 0; i < KMEANS_DEFAULT; i++ )
{
setcolor( 1 );
rectangle( (int)(kmeans->x[i] * maxx) - KMEAN_RADIUS, (int)(kmeans->y[i] * maxy) - KMEAN_RADIUS,
(int)(kmeans->x[i] * maxx) + KMEAN_RADIUS, (int)(kmeans->y[i] * maxy) + KMEAN_RADIUS );
setfillstyle(SOLID_FILL, i % 15 + 1 );
floodfill( (int)(kmeans->x[i] * maxx), (int)(kmeans->y[i] * maxy), 1 );
}
return;
}
static int kmeans_eval_closest( struct kmeans *kmeans, float *trainx, float *trainy, int *train_mean )
{
int i, j;
float distance;
float min_distance;
int best_mean;
int changed = 0;
for ( i = 0; i < TRAIN_CNT_DEFAULT; i++ )
{
min_distance = 1.0f;
for ( j = 0; j < KMEANS_DEFAULT; j++ )
{
distance = powf( trainx[i] - (kmeans->x)[j], 2 ) + powf( trainy[i] - (kmeans->y)[j], 2 );
if ( distance < min_distance )
{
min_distance = distance;
best_mean = j;
}
}
if ( train_mean[i] != best_mean )
{
train_mean[i] = best_mean;
changed = 1;
}
}
return changed;
}
static void kmeans_move_to_new_mean( struct kmeans *kmeans, float *trainx, float *trainy, int *train_mean )
{
int i, j;
float dist_x;
float dist_y;
int n_points;
for ( i = 0; i < KMEANS_DEFAULT; i++ )
{
dist_x = 0.0;
dist_y = 0.0;
n_points = 0;
for ( j = 0; j < TRAIN_CNT_DEFAULT; j++ )
{
if ( train_mean[j] == i )
{
n_points++;
dist_x += trainx[j];
dist_y += trainy[j];
}
}
if ( n_points ) /* can't be 0 */
{
(kmeans->x)[i] = dist_x / (float)n_points;
(kmeans->y)[i] = dist_y / (float)n_points;
}
}
return;
}
static int train_vectors_init( int training_cnt, struct pbm_image *img, float **vectorx, float **vectory, int **vector_mean )
{
int i;
int pwidth;
int pheight;
(*vectorx) = (float *)malloc( sizeof (float) * training_cnt );
if ( !(*vectorx) )
{
return ( -1 );
}
(*vectory) = (float *)malloc( sizeof (float) * training_cnt );
if ( !(*vectory) )
{
return ( -1 );
}
(*vector_mean) = (int *)malloc( sizeof (float) * training_cnt );
for ( i=0; i<training_cnt; i++ )
{
pbm_image_get_rand_black_point( img, &pwidth, &pheight );
(*vectorx)[i] = pwidth / (float)(img->width);
(*vectory)[i] = pheight / (float)(img->height);
(*vector_mean)[i] = -1;
}
return ( 0 );
}
int main( int argc, char **argv )
{
struct kmeans kmeans;
struct pbm_image img;
struct config cfg;
void *bitmap_bground;
int page1, page2;
int i,j;
char *namebuf;
float *trainx;
float *trainy;
int *train_mean; /* which mean does this training point belong to */
/* name used for output images */
namebuf = (char *)malloc( 15 );
/* 1. read configuration */
config_initx( &cfg );
if ( config_parse_cmdline( &cfg, argc, argv ) < 0 )
{
printf( "cmdline parse error\n" );
return ( -1 );
}
pbm_image_initx( &img );
/* 2. read pbm image */
if ( pbm_image_from_file( &img, cfg.pbm_file ) < 0 )
{
return ( -1 );
}
kmeans.cfg = &cfg;
/* seed the random number generator used to pick training data */
srand( time( NULL ) );
train_vectors_init( cfg.train_cnt, &img, &trainx, &trainy, &train_mean );
kmeans_init_weights ( &kmeans );
// kmeans_init_weights_v2( &kmeans, trainx, trainy );
/* measure time */
LARGE_INTEGER frequency; // ticks per second
LARGE_INTEGER t1, t2; // ticks
double elapsedTime;
// get ticks per second
QueryPerformanceFrequency(&frequency);
// start timer
QueryPerformanceCounter(&t1);
/* init window */
initwindow((img.width),(img.height), "window", 500, 000, 1, 1 );
readimagefile( cfg.image_file, 0, 0, (img.width),(img.height) );
bitmap_bground=malloc( imagesize(0,0,(img.width),(img.height)) );
getimage(0,0,(img.width),(img.height), bitmap_bground);
/* first screenshot */
train_vectors_draw( TRAIN_CNT_DEFAULT, trainx, trainy, train_mean, (img.width),(img.height) );
kmeans_redraw( &kmeans, (img.width),(img.height) );
sprintf( namebuf, "image%05d.bmp", 0 );
writeimagefile( namebuf, 0, 0, img.width, img.height );
page1 = getvisualpage();
page2 = getactivepage();
setactivepage( page1 );
setvisualpage( page2 );
Sleep( 1000 );
cleardevice();
putimage(0,0,bitmap_bground,COPY_PUT);
/* main cycle */
int mean_changed = 0;
int cycle = 0;
while ( 1 )
{
cycle++;
mean_changed = kmeans_eval_closest( &kmeans, trainx, trainy, train_mean );
if ( ! mean_changed )
{
break;
}
train_vectors_draw( TRAIN_CNT_DEFAULT, trainx, trainy, train_mean, (img.width),(img.height) );
kmeans_redraw( &kmeans, (img.width),(img.height) );
sprintf( namebuf, "image%05d.bmp", cycle );
writeimagefile( namebuf, 0, 0, img.width, img.height );
kmeans_move_to_new_mean( &kmeans, trainx, trainy, train_mean );
page1 = getvisualpage();
page2 = getactivepage();
setactivepage( page1 );
setvisualpage( page2 );
cleardevice();
putimage(0,0,bitmap_bground,COPY_PUT);
Sleep( 1000 );
}
// stop timer
QueryPerformanceCounter(&t2);
elapsedTime = (t2.QuadPart - t1.QuadPart) * 1000.0 / frequency.QuadPart;
printf ("cycles: %d, time: %.2lf ms off_spiral: %d\n", cycle, elapsedTime, kmeans_off_spiral( &kmeans, &img ) );
train_vectors_draw( TRAIN_CNT_DEFAULT, trainx, trainy, train_mean, (img.width),(img.height) );
kmeans_redraw( &kmeans, (img.width),(img.height) );
setvisualpage(getactivepage());
while(!kbhit());
closegraph();
return ( 0 );
}
Zdrojové súbory: Zdrojové súbory (.zip) | Dokumentácia (docx)
Discussion
-
Michal