00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "som.h"
00022 #include "distances.h"
00023 #include "mathadd.h"
00024 #include <gsl/gsl_rng.h>
00025 #include <gsl/gsl_randist.h>
00026
00040 double** som_generate_connectivity_matrix( SOMConnectivityType type, double **m, int n ){
00041 int i,j;
00042 int sqrtn=0;
00043 int ix,iy;
00044 int jx,jy;
00045 double diff;
00046 if( m==ALLOC_IN_FCT) {
00047 m = dblpp_init( n,n );
00048 }
00049 switch( type ){
00050 case ONED_LINEAR:
00051 for( i=0; i<n; i++ ){
00052 for( j=i; j<n; j++ ){
00053 m[i][j] = ABS( i-j );
00054 m[j][i] = m[i][j];
00055 }
00056 }
00057 break;
00058 case TWOD_GRID:
00059 if( SQR( sqrtn=sqrt((double)n) ) != n ){
00060 warnprintf("for a 2D-grid, n should be the square of an integer\n");
00061 }
00062 for( i=0; i<n; i++ ){
00063 ix=i/sqrtn;
00064 iy=i-sqrtn*ix;
00065 for( j=i; j<n; j++ ){
00066 jx = j/sqrtn;
00067 jy = j-jx*sqrtn;
00068 m[i][j] = sqrt( SQR(ix-jx)+SQR(iy-jy) );
00069 m[j][i] = m[i][j];
00070 }
00071 }
00072 break;
00073 case TWOD_HEXAGONAL:
00074 for( i=0; i<n; i++ ){
00075 ix=i/sqrtn;
00076 iy=i-sqrtn*ix;
00077 for( j=i; j<n; j++ ){
00078 jx = j/sqrtn;
00079 jy = j-jx*sqrtn;
00080 diff = ix-jx;
00081 if( (iy-jy)%2 != 0 ){
00082 if( iy%2 == 0 )
00083 diff -= 0.5;
00084 else
00085 diff += 0.5;
00086 }
00087 m[i][j] = SQR( diff );
00088 diff = iy-jy;
00089 m[i][j] += 0.75*SQR( diff );
00090 m[i][j] = sqrt( m[i][j] );
00091 m[j][i] = m[i][j];
00092 }
00093 }
00094 break;
00095 default:
00096 errprintf( "Do not know type '%i'\n", type );
00097 }
00098 return m;
00099 }
00100
00103 Som* som_init( int dimension, int n, int nruns, SOMConnectivityType connectivity_type ){
00104 Som *s;
00105 int i;
00106 s=(Som*)malloc(sizeof(Som));
00107 s->dimension=dimension;
00108 s->n=n;
00109 s->nruns = nruns;
00110 dprintf("dimension=%i, n=%i\n", s->dimension, s->n );
00111 s->m = (double**) malloc( n*sizeof(double*) );
00112 for( i=0; i<n; i++ ){
00113 s->m[i] = (double*)malloc( dimension*sizeof(double));
00114 }
00115 s->distancefct = vectordist_euclidean;
00116 s->time_decay = som_time_decay_linear;
00117 s->initial_runs = 0.1*nruns;
00118 s->neighbourhoodfct = som_neighbourhood_gaussian;
00119 s->connectivity_type = connectivity_type;
00120 if( s->connectivity_type!=CUSTOM ){
00121 s->connectivity = som_generate_connectivity_matrix( s->connectivity_type,
00122 ALLOC_IN_FCT, s->n );
00123 }
00124 s->distancefct_parameters=NULL;
00125
00126 gsl_rng_env_setup();
00127 s->random_number_type = (gsl_rng_type *)gsl_rng_default;
00128 s->rng = gsl_rng_alloc (s->random_number_type);
00129
00130 return s;
00131 }
00132
00133 void som_free( Som *s ){
00134 int i;
00135 for( i=0; i<s->n; i++ ){
00136 free( s->m[i] );
00137 }
00138 free( s->m );
00139 if( s->connectivity_type==CUSTOM ){
00140 for( i=0; i<s->n; i++ )
00141 free( s->connectivity[i] );
00142 free( s->connectivity ) ;
00143 }
00144 free( s );
00145 }
00146
00147
00151 double som_time_decay_linear( int t, int nruns, int initial_runs ){
00152 double alpha;
00153
00154 if( t<=initial_runs ){
00155 alpha = 0.9*(1-(double)t/(double)(initial_runs));
00156 } else {
00157 alpha = 0.02*(1-(double)t/(double)nruns);
00158 }
00159 return alpha;
00160 }
00161
00162
00166 double som_neighbourhood_gaussian( int x, int bmu, struct som_struct *s, int t ){
00167 double sigma;
00168 double h;
00169
00170
00171 sigma = exp(-(double)t/(double)s->initial_runs)*(s->n/2) + 1;
00172
00173 h = exp(-pow( s->connectivity[x][bmu], 2.0)/(double)(2.0*pow(sigma, 2.0)));
00174
00175 return h;
00176 }
00177
00184 void som_train_from_data( Som *s, double **X, int dim, int nsamples ){
00185 int i, j, t;
00186 int bmu=0;
00187 double h, tmp;
00188 double bmu_score=0;
00189 double *input;
00190 double alpha;
00191
00192 if( dim != s->dimension ){
00193 errprintf( "Data dimension and SOM-dimension do not match... exit\n");
00194 return;
00195 }
00196
00197 if( s->progress ){
00198 (*(s->progress))( PROGRESSBAR_INIT, s->nruns );
00199 }
00200
00201 for( t=1; t<=s->nruns; t++ ){
00202 if( s->progress ){
00203 s->progress( PROGRESSBAR_CONTINUE_LONG, t-1 );
00204 }
00205
00206
00207 input = X[(t-1) % nsamples];
00208
00209
00210 for( i=0; i<s->n; i++ ){
00211 if( (tmp=s->distancefct( input, s->m[i], dim, NULL ))<bmu_score ){
00212 bmu_score = tmp;
00213 bmu = i;
00214 }
00215 }
00216
00217
00218 for( i=0; i<s->n; i++ ){
00219 if( s->progress ){
00220 s->progress( PROGRESSBAR_CONTINUE_SHORT, i );
00221 }
00222
00223 h = s->neighbourhoodfct( i, bmu, s, t );
00224 alpha = s->time_decay( t,s->nruns,s->initial_runs );
00225 for( j=0; j<dim; j++ ){
00226 s->m[i][j] += alpha*h*(input[j]-s->m[i][j]);
00227 }
00228 }
00229 }
00230
00231 }
00232
00233
00237 void som_initialize_random( Som *s, double min, double max ){
00238 int i,j;
00239
00240 for( i=0; i<s->n; i++ ){
00241 for( j=0; j<s->dimension; j++ ){
00242 s->m[i][j] = gsl_ran_flat( s->rng, min, max );
00243 }
00244 }
00245 }
00246
00247
00254 void som_initialize_random_samples( Som *s, double **X, int dim, int nsamples ){
00255 int i;
00256 int ransamp;
00257
00258 if( dim != s->dimension ){
00259 errprintf( "Data dimension and SOM-dimension do not match... exit\n");
00260 return;
00261 }
00262
00263 for( i=0; i<s->n; i++ ){
00264 ransamp = gsl_rng_uniform_int( s->rng, nsamples );
00265 memcpy( s->m[i], X[ransamp], dim*sizeof(double) );
00266 }
00267 }
00268
00269 void som_print( FILE *f, Som *s ){
00270 fprintf( f,
00271 "Som-struct '%p'\n"
00272 " dimension = %i\n"
00273 " n = %i\n"
00274 " nruns = %i\n"
00275 " initial = %i\n", s, s->dimension, s->n, s->nruns,
00276 s->initial_runs );
00277 }