00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "gapstat.h"
00022 #include "linalg.h"
00023 #include "clustering.h"
00024 #include <time.h>
00025 #include "eeg.h"
00026
00027
00028 #ifdef LIBEEGTOOLS_FIXME
00029
00035 GapStatistic* gapstat_init( GapStatistic *g, int K, int B ){
00036 int i;
00037
00038 if( g==ALLOC_IN_FCT ){
00039 g = (GapStatistic*)malloc( sizeof(GapStatistic) );
00040 }
00041 g->K = K;
00042 g->B = B;
00043 g->khat = -1;
00044 dprintf("allocating memory\n");
00045 g->gapdistr = (double*)malloc( K*sizeof(double) );
00046 g->sk = (double*)malloc( K*sizeof(double) );
00047 g->Wk= (double*)malloc( K*sizeof(double) );
00048 g->Wkref= (double**) malloc( B*sizeof(double*) );
00049 for( i=0; i<B; i++ )
00050 g->Wkref[i] = (double*) malloc( K*sizeof(double) );
00051 g->progress=NULL;
00052
00053 return g;
00054 }
00055
00056 void gapstat_free( GapStatistic *g ){
00057 int i;
00058 free( g->Wk );
00059 for( i=0; i<g->B; i++ )
00060 free( g->Wkref[i] );
00061 free( g->Wkref );
00062 free( g->gapdistr );
00063 free( g->sk );
00064 free( g );
00065 }
00066
00074 void gapstat_calculate( GapStatistic *gap, double **X, int n, int p,
00075 VectorDistanceFunction distfunction, const double** D ){
00076 int i, k, b;
00077 Clusters *C;
00078 double **Xr;
00079 double **Dr;
00080 double l_bar;
00081
00082
00083 Xr = (double**) malloc( n*sizeof( double* ) );
00084 Dr = (double**) malloc( n*sizeof( double* ) );
00085 for( i=0; i<n; i++ ){
00086 Xr[i] = (double*) malloc( p*sizeof( double ) );
00087 Dr[i] = (double*) malloc( n*sizeof( double ) );
00088 }
00089
00090 if( gap->progress ){
00091 gap->progress( PROGRESSBAR_INIT, (gap->B+1)*gap->K );
00092 }
00093
00094
00095 for( k=1; k<=gap->K; k++ ){
00096 if( gap->progress )
00097 gap->progress( PROGRESSBAR_CONTINUE_LONG, k );
00098 C = kmedoids_repeat( D, n, k, 50 );
00099
00100 gap->Wk[k-1] = get_within_scatter( D, n, C );
00101 cluster_free( C );
00102 }
00103
00104 for( b=0; b<gap->B; b++ ){
00105 for( k=1; k<=gap->K; k++ ){
00106 if( gap->progress ){
00107 gap->progress( PROGRESSBAR_CONTINUE_LONG, (b+1)*gap->K+k );
00108 }
00109
00110 Xr = gap_get_reference_distribution_simple( (const double **) X, n, p, Xr );
00111 Dr = vectordist_distmatrix( distfunction, (const double**)Xr, n, p, Dr, NULL, NULL );
00112 C = kmedoids_repeat( (const double**)Dr, n, k, 50 );
00113
00114 gap->Wkref[b][k-1] = get_within_scatter( (const double**)Dr, n, C );
00115 cluster_free( C );
00116 }
00117 }
00118
00119 for( k=1; k<=gap->K; k++ ){
00120 gap->gapdistr[k-1]=0;
00121 for( b=0; b<gap->B; b++ ){
00122 gap->gapdistr[k-1] += log( gap->Wkref[b][k-1] );
00123 }
00124 l_bar = ((gap->gapdistr[k-1])/(double)gap->B);
00125 gap->gapdistr[k-1] = l_bar - log( gap->Wk[k-1] );
00126
00127
00128 gap->sk[k-1] = 0;
00129 for( b=0; b<gap->B; b++ ){
00130 gap->sk[k-1] += SQR( log( gap->Wkref[b][k-1] ) - l_bar );
00131 }
00132 gap->sk[k-1] = sqrt( gap->sk[k-1]/(double)gap->B );
00133 gap->sk[k-1] = gap->sk[k-1] * sqrt( 1.0 + 1.0/(double)gap->B );
00134 }
00135
00136
00137 for( k=0; k<=gap->K-1; k++ ){
00138 if( gap->gapdistr[k] >= ((gap->gapdistr[k+1])-(gap->sk[k+1])) ){
00139 gap->khat = k+1;
00140 break;
00141 }
00142 }
00143 if( gap->progress )
00144 gap->progress( PROGRESSBAR_FINISH, 0 );
00145
00146
00147 for( i=0; i<n; i++ ){
00148 free(Xr[i]);
00149 free(Dr[i]);
00150 }
00151 free( Xr );
00152 free( Dr );
00153 }
00154
00155 void gapstat_print( FILE *out, GapStatistic *g ){
00156 int i, j;
00157 double mean;
00158
00159 fprintf(stderr, "GapStatistic:\n"
00160 " K=%i\n"
00161 " B=%i\n"
00162 " khat=%i\n", g->K, g->B, g->khat );
00163 fprintf(stderr, " Gap = [ ");
00164 for(i=0; i<g->K; i++ )
00165 fprintf(stderr, "(%i, %.2f) ", i+1, g->gapdistr[i]);
00166 fprintf(stderr, "]\n");
00167
00168 fprintf(stderr, " sk = [ ");
00169 for(i=0; i<g->K; i++ )
00170 fprintf(stderr, "(%i, %.4f) ", i+1, g->sk[i]);
00171 fprintf(stderr, "]\n");
00172
00173 fprintf(stderr, " Wk = [ ");
00174 for(i=0; i<g->K; i++ )
00175 fprintf(stderr, "(%i, %.4f) ", i+1, g->Wk[i]);
00176 fprintf(stderr, "]\n");
00177
00178 fprintf(stderr, " <Wkref>_B = [ ");
00179 for(i=0; i<g->K; i++ ){
00180 mean = 0;
00181 for(j=0; j<g->B; j++){
00182 mean += g->Wkref[j][i];
00183 }
00184 mean /= (double)g->B;
00185 fprintf(stderr, "(%i, %.4f) ", i+1, mean);
00186 }
00187 fprintf(stderr, "]\n");
00188
00189 }
00190
00196 double** gap_get_reference_distribution_simple( const double **X, int n, int p, double **Xr ){
00197 int i, j;
00198 double minf, maxf;
00199
00200 for( i=0; i<p; i++ ){
00201 minf=DBL_MAX;
00202 maxf=DBL_MIN;
00203 for( j=0; j<n; j++ ){
00204 if( X[j][i]<minf )
00205 minf = X[j][i];
00206 if( X[j][i]>maxf )
00207 maxf = X[j][i];
00208 }
00209
00210 for( j=0; j<n; j++ ){
00211 Xr[j][i] = (drand48()*maxf)+minf;
00212 if( isnan(Xr[j][i]) ){
00213 errprintf(" Xr[%i][%i] is nan\n", j, i);
00214 }
00215 }
00216 }
00217
00218 return Xr;
00219 }
00220
00228 double** gap_get_reference_distribution_svd( const double **X, int n, int p, double **Xr ){
00229 int i, j;
00230 double minf, maxf;
00231 gsl_matrix *gX, *gXd, *V;
00232 gsl_vector *work, *S;
00233
00234
00235 gX = gsl_matrix_alloc( n, p );
00236 gXd= gsl_matrix_alloc( n, p );
00237 V = gsl_matrix_alloc( n, p );
00238 S = gsl_vector_alloc( p );
00239 work=gsl_vector_alloc( p );
00240
00241 for( i=0; i<n; i++ ){
00242 for( j=0; j<p; j++ ){
00243 gsl_matrix_set( gX, i, j, X[i][j] );
00244 }
00245 }
00246 gsl_matrix_memcpy( gXd, gX );
00247
00248 gsl_linalg_SV_decomp( gXd, V, S, work);
00249
00250 gsl_blas_dgemm(CblasNoTrans,
00251 CblasNoTrans,
00252 1.0, gX, V, 0.0, gXd);
00253
00254 for( i=0; i<p; i++ ){
00255 minf=DBL_MAX;
00256 maxf=DBL_MIN;
00257 for( j=0; j<n; j++ ){
00258 if( gsl_matrix_get( gXd, j, i )<minf )
00259 minf = gsl_matrix_get( gXd, j, i );
00260 if( gsl_matrix_get( gXd, j, i )>maxf )
00261 maxf = gsl_matrix_get( gXd, j, i );
00262 }
00263
00264 for( j=0; j<n; j++ ){
00265 gsl_matrix_set( gX, j, i, ((drand48()*maxf)+minf) );
00266 }
00267 }
00268
00269 gsl_matrix_transpose_memcpy( gXd, V );
00270 gsl_blas_dgemm(CblasNoTrans,
00271 CblasNoTrans,
00272 1.0, gX, gXd, 0.0, V);
00273
00274 for( i=0; i<n; i++ ){
00275 for( j=0; j<p; j++ ){
00276 Xr[j][i] = gsl_matrix_get( V, i, j );
00277 }
00278 }
00279
00280 gsl_matrix_free( gX );
00281 gsl_matrix_free( gXd );
00282 gsl_matrix_free( V );
00283 gsl_vector_free( work );
00284 gsl_vector_free( S );
00285
00286 return Xr;
00287 }
00288
00289 #endif