• API Main Page
  • Documentation
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

src/gapstat.c

Go to the documentation of this file.
00001 /***************************************************************************
00002  *   Copyright (C) 2008 by Matthias Ihrke   *
00003  *   mihrke@uni-goettingen.de   *
00004  *                                                                         *
00005  *   This program is free software; you can redistribute it and/or modify  *
00006  *   it under the terms of the GNU General Public License as published by  *
00007  *   the Free Software Foundation; either version 2 of the License, or     *
00008  *   (at your option) any later version.                                   *
00009  *                                                                         *
00010  *   This program is distributed in the hope that it will be useful,       *
00011  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
00012  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
00013  *   GNU General Public License for more details.                          *
00014  *                                                                         *
00015  *   You should have received a copy of the GNU General Public License     *
00016  *   along with this program; if not, write to the                         *
00017  *   Free Software Foundation, Inc.,                                       *
00018  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
00019  ***************************************************************************/
00020 
00021 #include "gapstat.h"
00022 #include "linalg.h"
00023 #include "clustering.h"
00024 #include <time.h>
00025 #include "eeg.h"
00026 
00027 
00028 #ifdef LIBEEGTOOLS_FIXME
00029 
00035 GapStatistic* gapstat_init( GapStatistic *g, int K, int B ){
00036   int i;
00037 
00038   if( g==ALLOC_IN_FCT ){
00039      g = (GapStatistic*)malloc( sizeof(GapStatistic) );
00040   }
00041   g->K = K;
00042   g->B = B;
00043   g->khat = -1;
00044   dprintf("allocating memory\n");
00045   g->gapdistr = (double*)malloc( K*sizeof(double) );
00046   g->sk = (double*)malloc( K*sizeof(double) );
00047   g->Wk= (double*)malloc( K*sizeof(double) );
00048   g->Wkref= (double**) malloc( B*sizeof(double*) );
00049   for( i=0; i<B; i++ )
00050      g->Wkref[i] = (double*) malloc( K*sizeof(double) );
00051   g->progress=NULL;
00052 
00053   return g;
00054 }
00055 
00056 void gapstat_free( GapStatistic *g ){
00057   int i;
00058   free( g->Wk );
00059   for( i=0; i<g->B; i++ )
00060      free( g->Wkref[i] );
00061   free( g->Wkref );
00062   free( g->gapdistr );
00063   free( g->sk );
00064   free( g );
00065 }
00066 
00074 void gapstat_calculate( GapStatistic *gap, double **X, int n, int p, 
00075                                 VectorDistanceFunction distfunction, const double** D ){
00076   int i, k, b;
00077   Clusters *C;
00078   double **Xr; /* reference distribution */
00079   double **Dr; /* distance in ref dist */
00080   double l_bar; /* (1/B sum_b( log(Wkref[b]) )) */
00081 
00082   /* init */
00083   Xr = (double**) malloc( n*sizeof( double* ) );
00084   Dr = (double**) malloc( n*sizeof( double* ) );
00085   for( i=0; i<n; i++ ){
00086      Xr[i] = (double*) malloc( p*sizeof( double ) );
00087      Dr[i] = (double*) malloc( n*sizeof( double ) );
00088   }
00089 
00090   if( gap->progress ){
00091      gap->progress( PROGRESSBAR_INIT, (gap->B+1)*gap->K );
00092   }
00093 
00094   /* calculation */
00095   for( k=1; k<=gap->K; k++ ){ /* data within-scatter */
00096      if( gap->progress )
00097         gap->progress( PROGRESSBAR_CONTINUE_LONG, k );
00098      C = kmedoids_repeat( D, n, k, 50 );
00099      //  cluster_print( stderr, C );
00100      gap->Wk[k-1] = get_within_scatter( D, n, C );
00101      cluster_free( C );
00102   }
00103 
00104   for( b=0; b<gap->B; b++ ){ /* monte Carlo for ref-data*/
00105      for( k=1; k<=gap->K; k++ ){
00106         if( gap->progress ){
00107           gap->progress( PROGRESSBAR_CONTINUE_LONG, (b+1)*gap->K+k );
00108         }
00109         
00110         Xr = gap_get_reference_distribution_simple( (const double **) X, n, p, Xr );
00111         Dr = vectordist_distmatrix( distfunction, (const double**)Xr, n, p, Dr, NULL, NULL );
00112         C = kmedoids_repeat( (const double**)Dr, n, k, 50 );
00113         //      cluster_print(stderr, C );
00114         gap->Wkref[b][k-1] = get_within_scatter( (const double**)Dr, n, C );
00115         cluster_free( C );
00116      }
00117   }
00118   
00119   for( k=1; k<=gap->K; k++ ){ /* gap distr */
00120      gap->gapdistr[k-1]=0;
00121      for( b=0; b<gap->B; b++ ){
00122         gap->gapdistr[k-1] += log( gap->Wkref[b][k-1] );
00123      }
00124      l_bar = ((gap->gapdistr[k-1])/(double)gap->B);
00125      gap->gapdistr[k-1] = l_bar - log( gap->Wk[k-1] );
00126 
00127      /* need std and sk */
00128      gap->sk[k-1] = 0;
00129      for( b=0; b<gap->B; b++ ){
00130         gap->sk[k-1] += SQR( log( gap->Wkref[b][k-1] ) - l_bar );
00131      }
00132      gap->sk[k-1] = sqrt( gap->sk[k-1]/(double)gap->B );
00133      gap->sk[k-1] = gap->sk[k-1] * sqrt( 1.0 + 1.0/(double)gap->B );
00134   }
00135   
00136   /* find best k */
00137   for( k=0; k<=gap->K-1; k++ ){
00138      if( gap->gapdistr[k] >= ((gap->gapdistr[k+1])-(gap->sk[k+1])) ){
00139         gap->khat = k+1;
00140         break;
00141      }
00142   }
00143   if( gap->progress )
00144      gap->progress( PROGRESSBAR_FINISH, 0 );
00145 
00146   /* free */
00147   for( i=0; i<n; i++ ){
00148      free(Xr[i]);
00149      free(Dr[i]);
00150   }
00151   free( Xr );
00152   free( Dr );
00153 }
00154 
00155 void gapstat_print( FILE *out, GapStatistic *g ){
00156   int i, j;
00157   double mean; 
00158 
00159   fprintf(stderr, "GapStatistic:\n"
00160              " K=%i\n"
00161              " B=%i\n"
00162              " khat=%i\n", g->K, g->B, g->khat );
00163   fprintf(stderr, " Gap       = [ ");
00164   for(i=0; i<g->K; i++ )
00165      fprintf(stderr, "(%i, %.2f) ", i+1, g->gapdistr[i]);
00166   fprintf(stderr, "]\n");
00167 
00168   fprintf(stderr, " sk        = [ ");
00169   for(i=0; i<g->K; i++ )
00170      fprintf(stderr, "(%i, %.4f) ", i+1, g->sk[i]);
00171   fprintf(stderr, "]\n");
00172 
00173   fprintf(stderr, " Wk        = [ ");
00174   for(i=0; i<g->K; i++ )
00175      fprintf(stderr, "(%i, %.4f) ", i+1, g->Wk[i]);
00176   fprintf(stderr, "]\n");
00177 
00178   fprintf(stderr, " <Wkref>_B = [ ");
00179   for(i=0; i<g->K; i++ ){
00180      mean = 0;
00181      for(j=0; j<g->B; j++){
00182         mean += g->Wkref[j][i];
00183      }
00184      mean /= (double)g->B;
00185      fprintf(stderr, "(%i, %.4f) ", i+1, mean);
00186   }
00187   fprintf(stderr, "]\n");
00188 
00189 }
00190 
00196 double** gap_get_reference_distribution_simple( const double **X, int n, int p, double **Xr ){
00197   int i, j;
00198   double minf, maxf;
00199 
00200   for( i=0; i<p; i++ ){ /* features */
00201      minf=DBL_MAX;
00202      maxf=DBL_MIN;
00203      for( j=0; j<n; j++ ){        /* find min/max for feature i */
00204         if( X[j][i]<minf )
00205           minf = X[j][i];
00206         if( X[j][i]>maxf )
00207           maxf = X[j][i];
00208      }
00209      
00210      for( j=0; j<n; j++ ){ /* uniform random values in [minf,maxf] */
00211         Xr[j][i] = (drand48()*maxf)+minf;
00212         if( isnan(Xr[j][i]) ){
00213           errprintf(" Xr[%i][%i] is nan\n", j, i);
00214         }
00215      }
00216   }
00217 
00218   return Xr;
00219 }
00220 
00228 double** gap_get_reference_distribution_svd( const double **X, int n, int p, double **Xr ){
00229   int i, j;
00230   double minf, maxf;
00231   gsl_matrix *gX, *gXd, *V;
00232   gsl_vector *work, *S;
00233 
00234   /* convert to gsl */
00235   gX = gsl_matrix_alloc( n, p );
00236   gXd= gsl_matrix_alloc( n, p );
00237   V  = gsl_matrix_alloc( n, p );
00238   S  = gsl_vector_alloc( p );
00239   work=gsl_vector_alloc( p );
00240 
00241   for( i=0; i<n; i++ ){
00242      for( j=0; j<p; j++ ){
00243         gsl_matrix_set( gX, i, j, X[i][j] );
00244      }
00245   }
00246   gsl_matrix_memcpy( gXd, gX );
00247 
00248   gsl_linalg_SV_decomp( gXd, V, S, work);
00249 
00250   gsl_blas_dgemm(CblasNoTrans, /* matrix mult */
00251                       CblasNoTrans,
00252                       1.0, gX, V, 0.0, gXd);
00253 
00254   for( i=0; i<p; i++ ){ /* features */
00255      minf=DBL_MAX;
00256      maxf=DBL_MIN;
00257      for( j=0; j<n; j++ ){        /* find min/max for feature i */
00258         if( gsl_matrix_get( gXd, j, i )<minf )
00259           minf = gsl_matrix_get( gXd, j, i );
00260         if( gsl_matrix_get( gXd, j, i )>maxf )
00261           maxf = gsl_matrix_get( gXd, j, i );
00262      }
00263      
00264      for( j=0; j<n; j++ ){ /* uniform random values in [minf,maxf] */
00265         gsl_matrix_set( gX, j, i, ((drand48()*maxf)+minf) );
00266      }
00267   }
00268 
00269   gsl_matrix_transpose_memcpy( gXd, V );
00270   gsl_blas_dgemm(CblasNoTrans,
00271                       CblasNoTrans,
00272                       1.0, gX, gXd, 0.0, V);
00273 
00274   for( i=0; i<n; i++ ){
00275      for( j=0; j<p; j++ ){
00276         Xr[j][i] = gsl_matrix_get( V, i, j );
00277      }
00278   }
00279 
00280   gsl_matrix_free( gX );
00281   gsl_matrix_free( gXd );
00282   gsl_matrix_free( V );
00283   gsl_vector_free( work );
00284   gsl_vector_free( S );
00285 
00286   return Xr;
00287 }
00288 
00289 #endif

Generated on Fri Jun 25 2010 14:10:18 for libeegtools by  doxygen 1.7.0