00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "clustering.h"
00022 #include "linalg.h"
00023 #include <time.h>
00024 #include "eeg.h"
00025
00026
00035 Clusters* kmedoids_repeat( const Array *distmat, int K, int repeat ){
00036 Clusters *C;
00037 Clusters *Cnew;
00038 int i;
00039 double ratio, tmp;
00040 bool ismatrix;
00041 matrix_CHECKSQR( ismatrix, distmat );
00042 if( !ismatrix) return NULL;
00043 int N = distmat->size[0];
00044
00045 long initial_seed=(long)time(NULL);
00046 OptArgList *opts=optarglist("seed=long", initial_seed );
00047
00048 C = cluster_init( K, N );
00049 ratio=DBL_MAX;
00050 for( i=0; i<repeat; i++ ){
00051 optarglist_optarg_by_key( opts, "seed" )->data_scalar=initial_seed+i*10;
00052 Cnew = kmedoids( distmat, K, opts );
00053 tmp = 1.0;
00054
00055 tmp = cluster_within_scatter ( distmat, Cnew );
00056 tmp /= cluster_between_scatter( distmat, Cnew );
00057
00058 if( tmp<ratio ){
00059 dprintf("Run %i: %f\n", i, tmp);
00060 ratio=tmp;
00061 cluster_copy( C, Cnew );
00062 }
00063 cluster_free( Cnew );
00064 }
00065 optarglist_free( opts );
00066
00067 return C;
00068 }
00069
00079 Clusters* kmedoids(const Array *distmat, int K, OptArgList *optargs ){
00080 Clusters *C, *Cnew;
00081 int *medoids;
00082 int i, j, k, r, minidx, num_not_changed;
00083 double sum, minsum, mindist, curdist;
00084 bool ismatrix;
00085 matrix_CHECKSQR( ismatrix, distmat );
00086 if( !ismatrix) return NULL;
00087 int N = distmat->size[0];
00088
00089
00090 unsigned long seed=0;
00091 double x;
00092 optarg_PARSE_SCALAR( optargs, "seed", seed, unsigned long, x );
00093
00094 if( K>N ){
00095 errprintf("K>=N: %i>=%i\n", K, N );
00096 return NULL;
00097 }
00098
00099
00100 medoids = (int*) malloc( K*sizeof(int) );
00101 C = cluster_init(K, N);
00102 Cnew = cluster_init(K, N);
00103
00104
00105 const gsl_rng_type * T;
00106 gsl_rng * randgen;
00107 gsl_rng_env_setup();
00108 T = gsl_rng_default;
00109 randgen = gsl_rng_alloc (T);
00110 if( seed==0 )
00111 seed=(unsigned long)time(NULL);
00112 gsl_rng_set (randgen, seed );
00113
00114 Array *permut=array_new2( UINT, 1, K );
00115 for( i=0; i<K; i++ )
00116 array_INDEX1( permut, uint, i )=i;
00117 array_shuffle( permut, seed );
00118 for( i=0; i<N; i++ ){
00119 if( i<K ){
00120 r = array_INDEX1( permut, uint, i );
00121 } else {
00122 r = gsl_rng_uniform_int( randgen, K );
00123 }
00124 C->clust[r][C->n[r]] = i;
00125 (C->n[r])++;
00126 }
00127 array_free( permut );
00128
00129 dprintf("initialized cluster\n");
00130 #ifdef DEBUG
00131 cluster_print(stderr, C);
00132 #endif
00133
00134
00135 num_not_changed = 0;
00136 while(num_not_changed<1){
00137 for( i=0; i<K; i++ ){
00138 minsum = DBL_MAX;
00139 minidx = 0;
00140 for( j=0; j<C->n[i]; j++ ){
00141 sum = 0;
00142 for( k=0; k<C->n[i]; k++){
00143 sum += mat_IDX( distmat, C->clust[i][j], C->clust[i][k] );
00144 }
00145
00146 if( sum<minsum ){
00147 minsum = sum;
00148 minidx = C->clust[i][j];
00149 }
00150 }
00151 medoids[i] = minidx;
00152 }
00153
00154
00155 for( i=0; i<Cnew->K; i++ ){
00156 Cnew->n[i] = 0;
00157 }
00158
00159 for( i=0; i<N; i++ ){
00160 mindist = DBL_MAX;
00161 minidx = 0;
00162 for( j=0; j<K; j++ ){
00163 curdist = mat_IDX( distmat, medoids[j], i );
00164 if( curdist<mindist ){
00165 mindist=curdist;
00166 minidx=j;
00167 }
00168 }
00169
00170 Cnew->clust[minidx][Cnew->n[minidx]] = i;
00171 Cnew->n[minidx] += 1;
00172 }
00173
00174 if(!cluster_compare(C, Cnew))
00175 num_not_changed++;
00176 else
00177 cluster_copy(C, Cnew);
00178 }
00179
00180
00181 free(medoids);
00182 cluster_free(Cnew);
00183 gsl_rng_free (randgen);
00184
00185 return C;
00186 }
00187
00193 int cluster_compare(const Clusters *c1, const Clusters *c2){
00194 int i, j, k, l, found, el;
00195 int c2i, prev_c2i;
00196
00197 if( c1->K != c2->K ) return 1;
00198
00199 for( i=0; i<c1->K; i++ ){
00200 found = 0;
00201 for( j=0; j<c2->K; j++ ){
00202 if(c1->n[i] == c2->n[j]){
00203 found = 1;
00204 break;
00205 }
00206 }
00207 if(!found)
00208 return 1;
00209 }
00210
00211
00212 for( i=0; i<c1->K; i++ ){
00213
00214 prev_c2i = c1->K+1;
00215 c2i = 0;
00216 for( j=0; j<c1->n[i]; j++ ){
00217 el = c1->clust[i][j];
00218
00219 for( k=0; k<c2->K; k++ ){
00220 for( l=0; l<c2->n[k]; l++ ){
00221 if( el == c2->clust[k][l] ){
00222 c2i = k;
00223 if(prev_c2i==c1->K+1)
00224 prev_c2i = k;
00225 }
00226 }
00227 }
00228 if( c2->n[c2i] != c1->n[i] ){
00229
00230 return 1;
00231 }
00232 if(prev_c2i!=c2i){
00233
00234 return 1;
00235 }
00236 }
00237 }
00238 return 0;
00239 }
00240
00242 void cluster_copy(Clusters *dest, const Clusters *src){
00243 int i, j;
00244
00245 dest->K = src->K;
00246 for( i=0; i<src->K; i++ ){
00247 dest->n[i] = src->n[i];
00248 for( j=0; j<src->n[i]; j++ ){
00249 dest->clust[i][j] = src->clust[i][j];
00250 }
00251 }
00252
00253 }
00254
00255 void cluster_free(Clusters *c){
00256 int i;
00257 free( c->n );
00258 for( i=0; i<c->K; i++)
00259 free(c->clust[i]);
00260 free(c->clust);
00261 free(c);
00262 }
00263
00264 Clusters* cluster_init(int K, int maxN){
00265 Clusters *c;
00266 int i;
00267
00268 c = (Clusters*) malloc( sizeof(Clusters) );
00269 c->K=K;
00270 c->n = (int*) malloc( K*sizeof(int) );
00271 c->clust = (int**) malloc( K*sizeof(int*) );
00272 for( i=0; i<K; i++ ){
00273 c->clust[i] = (int*) malloc( maxN*sizeof(int) );
00274 c->n[i] = 0;
00275 }
00276
00277 return c;
00278 }
00279
00280 void cluster_print(FILE *o,const Clusters *c){
00281 int i, j;
00282
00283 fprintf(o, "Cluster with %i partitions\n", c->K);
00284 for( i=0; i<c->K; i++ ){
00285 fprintf(o, " C[%i]=\{ ", i+1);
00286 for( j=0; j<c->n[i]; j++ ){
00287 fprintf( o, "%i ", c->clust[i][j] );
00288 }
00289 fprintf( o, "}\n" );
00290 }
00291 fprintf( o, "\n" );
00292 }
00293
00294 #if 0
00295
00300 GapStatistic* gapstat_init( GapStatistic *g, int K, int B ){
00301 int i;
00302
00303 if( g==ALLOC_IN_FCT ){
00304 g = (GapStatistic*)malloc( sizeof(GapStatistic) );
00305 }
00306 g->K = K;
00307 g->B = B;
00308 g->khat = -1;
00309 dprintf("allocating memory\n");
00310 g->gapdistr = (double*)malloc( K*sizeof(double) );
00311 g->sk = (double*)malloc( K*sizeof(double) );
00312 g->Wk= (double*)malloc( K*sizeof(double) );
00313 g->Wkref= (double**) malloc( B*sizeof(double*) );
00314 for( i=0; i<B; i++ )
00315 g->Wkref[i] = (double*) malloc( K*sizeof(double) );
00316 g->progress=NULL;
00317
00318 return g;
00319 }
00320
00321 void gapstat_free( GapStatistic *g ){
00322 int i;
00323 free( g->Wk );
00324 for( i=0; i<g->B; i++ )
00325 free( g->Wkref[i] );
00326 free( g->Wkref );
00327 free( g->gapdistr );
00328 free( g->sk );
00329 free( g );
00330 }
00331
00339 void gapstat_calculate( GapStatistic *gap, double **X, int n, int p,
00340 VectorDistanceFunction distfunction, const double** D ){
00341 int i, k, b;
00342 Clusters *C;
00343 double **Xr;
00344 double **Dr;
00345 double l_bar;
00346
00347
00348 Xr = (double**) malloc( n*sizeof( double* ) );
00349 Dr = (double**) malloc( n*sizeof( double* ) );
00350 for( i=0; i<n; i++ ){
00351 Xr[i] = (double*) malloc( p*sizeof( double ) );
00352 Dr[i] = (double*) malloc( n*sizeof( double ) );
00353 }
00354
00355 if( gap->progress ){
00356 gap->progress( PROGRESSBAR_INIT, (gap->B+1)*gap->K );
00357 }
00358
00359
00360 for( k=1; k<=gap->K; k++ ){
00361 if( gap->progress )
00362 gap->progress( PROGRESSBAR_CONTINUE_LONG, k );
00363 C = kmedoids_repeat( D, n, k, 50 );
00364
00365 gap->Wk[k-1] = get_within_scatter( D, n, C );
00366 cluster_free( C );
00367 }
00368
00369 for( b=0; b<gap->B; b++ ){
00370 for( k=1; k<=gap->K; k++ ){
00371 if( gap->progress ){
00372 gap->progress( PROGRESSBAR_CONTINUE_LONG, (b+1)*gap->K+k );
00373 }
00374
00375 Xr = gap_get_reference_distribution_simple( (const double **) X, n, p, Xr );
00376 Dr = vectordist_distmatrix( distfunction, (const double**)Xr, n, p, Dr, NULL, NULL );
00377 C = kmedoids_repeat( (const double**)Dr, n, k, 50 );
00378
00379 gap->Wkref[b][k-1] = get_within_scatter( (const double**)Dr, n, C );
00380 cluster_free( C );
00381 }
00382 }
00383
00384 for( k=1; k<=gap->K; k++ ){
00385 gap->gapdistr[k-1]=0;
00386 for( b=0; b<gap->B; b++ ){
00387 gap->gapdistr[k-1] += log( gap->Wkref[b][k-1] );
00388 }
00389 l_bar = ((gap->gapdistr[k-1])/(double)gap->B);
00390 gap->gapdistr[k-1] = l_bar - log( gap->Wk[k-1] );
00391
00392
00393 gap->sk[k-1] = 0;
00394 for( b=0; b<gap->B; b++ ){
00395 gap->sk[k-1] += SQR( log( gap->Wkref[b][k-1] ) - l_bar );
00396 }
00397 gap->sk[k-1] = sqrt( gap->sk[k-1]/(double)gap->B );
00398 gap->sk[k-1] = gap->sk[k-1] * sqrt( 1.0 + 1.0/(double)gap->B );
00399 }
00400
00401
00402 for( k=0; k<=gap->K-1; k++ ){
00403 if( gap->gapdistr[k] >= ((gap->gapdistr[k+1])-(gap->sk[k+1])) ){
00404 gap->khat = k+1;
00405 break;
00406 }
00407 }
00408 if( gap->progress )
00409 gap->progress( PROGRESSBAR_FINISH, 0 );
00410
00411
00412 for( i=0; i<n; i++ ){
00413 free(Xr[i]);
00414 free(Dr[i]);
00415 }
00416 free( Xr );
00417 free( Dr );
00418 }
00419
00420 void gapstat_print( FILE *out, GapStatistic *g ){
00421 int i, j;
00422 double mean;
00423
00424 fprintf(stderr, "GapStatistic:\n"
00425 " K=%i\n"
00426 " B=%i\n"
00427 " khat=%i\n", g->K, g->B, g->khat );
00428 fprintf(stderr, " Gap = [ ");
00429 for(i=0; i<g->K; i++ )
00430 fprintf(stderr, "(%i, %.2f) ", i+1, g->gapdistr[i]);
00431 fprintf(stderr, "]\n");
00432
00433 fprintf(stderr, " sk = [ ");
00434 for(i=0; i<g->K; i++ )
00435 fprintf(stderr, "(%i, %.4f) ", i+1, g->sk[i]);
00436 fprintf(stderr, "]\n");
00437
00438 fprintf(stderr, " Wk = [ ");
00439 for(i=0; i<g->K; i++ )
00440 fprintf(stderr, "(%i, %.4f) ", i+1, g->Wk[i]);
00441 fprintf(stderr, "]\n");
00442
00443 fprintf(stderr, " <Wkref>_B = [ ");
00444 for(i=0; i<g->K; i++ ){
00445 mean = 0;
00446 for(j=0; j<g->B; j++){
00447 mean += g->Wkref[j][i];
00448 }
00449 mean /= (double)g->B;
00450 fprintf(stderr, "(%i, %.4f) ", i+1, mean);
00451 }
00452 fprintf(stderr, "]\n");
00453
00454 }
00455
00461 double** gap_get_reference_distribution_simple( const double **X, int n, int p, double **Xr ){
00462 int i, j;
00463 double minf, maxf;
00464
00465 for( i=0; i<p; i++ ){
00466 minf=DBL_MAX;
00467 maxf=DBL_MIN;
00468 for( j=0; j<n; j++ ){
00469 if( X[j][i]<minf )
00470 minf = X[j][i];
00471 if( X[j][i]>maxf )
00472 maxf = X[j][i];
00473 }
00474
00475 for( j=0; j<n; j++ ){
00476 Xr[j][i] = (drand48()*maxf)+minf;
00477 if( isnan(Xr[j][i]) ){
00478 errprintf(" Xr[%i][%i] is nan\n", j, i);
00479 }
00480 }
00481 }
00482
00483 return Xr;
00484 }
00485
00493 double** gap_get_reference_distribution_svd( const double **X, int n, int p, double **Xr ){
00494 int i, j;
00495 double minf, maxf;
00496 gsl_matrix *gX, *gXd, *V;
00497 gsl_vector *work, *S;
00498
00499
00500 gX = gsl_matrix_alloc( n, p );
00501 gXd= gsl_matrix_alloc( n, p );
00502 V = gsl_matrix_alloc( n, p );
00503 S = gsl_vector_alloc( p );
00504 work=gsl_vector_alloc( p );
00505
00506 for( i=0; i<n; i++ ){
00507 for( j=0; j<p; j++ ){
00508 gsl_matrix_set( gX, i, j, X[i][j] );
00509 }
00510 }
00511 gsl_matrix_memcpy( gXd, gX );
00512
00513 gsl_linalg_SV_decomp( gXd, V, S, work);
00514
00515 gsl_blas_dgemm(CblasNoTrans,
00516 CblasNoTrans,
00517 1.0, gX, V, 0.0, gXd);
00518
00519 for( i=0; i<p; i++ ){
00520 minf=DBL_MAX;
00521 maxf=DBL_MIN;
00522 for( j=0; j<n; j++ ){
00523 if( gsl_matrix_get( gXd, j, i )<minf )
00524 minf = gsl_matrix_get( gXd, j, i );
00525 if( gsl_matrix_get( gXd, j, i )>maxf )
00526 maxf = gsl_matrix_get( gXd, j, i );
00527 }
00528
00529 for( j=0; j<n; j++ ){
00530 gsl_matrix_set( gX, j, i, ((drand48()*maxf)+minf) );
00531 }
00532 }
00533
00534 gsl_matrix_transpose_memcpy( gXd, V );
00535 gsl_blas_dgemm(CblasNoTrans,
00536 CblasNoTrans,
00537 1.0, gX, gXd, 0.0, V);
00538
00539 for( i=0; i<n; i++ ){
00540 for( j=0; j<p; j++ ){
00541 Xr[j][i] = gsl_matrix_get( V, i, j );
00542 }
00543 }
00544
00545 gsl_matrix_free( gX );
00546 gsl_matrix_free( gXd );
00547 gsl_matrix_free( V );
00548 gsl_vector_free( work );
00549 gsl_vector_free( S );
00550
00551 return Xr;
00552 }
00553
00554 #endif
00555
00567 double cluster_within_scatter (const Array *distmat, const Clusters *c){
00568 double *sums;
00569 double W;
00570 int i, j, k;
00571 bool ismatrix;
00572 matrix_CHECKSQR( ismatrix, distmat );
00573 if( !ismatrix) return -1;
00574
00575
00576 sums = (double*) malloc( c->K*sizeof(double) );
00577 W = 0;
00578 int numcomp=0;
00579 for( i=0; i<c->K; i++ ){
00580 sums[i] = 0;
00581 for( j=0; j<(c->n[i])-1; j++ ){
00582 for( k=j+1; k<c->n[i]; k++){
00583 sums[i] += mat_IDX( distmat, c->clust[i][j], c->clust[i][k]);
00584 numcomp++;
00585 }
00586 }
00587
00588
00589 W += sums[i];
00590 }
00591 W /= (double)2*numcomp;
00592
00593 free(sums);
00594
00595 return W;
00596 }
00597
00604 double cluster_between_scatter(const Array *distmat, const Clusters *c){
00605 double W;
00606 int k1, k2, i, j;
00607 int num_comp;
00608 bool ismatrix;
00609 matrix_CHECKSQR( ismatrix, distmat );
00610 if( !ismatrix) return -1;
00611
00612 W = 0.0;
00613 num_comp=0;
00614 for( k1=0; k1<c->K-1; k1++ ){
00615 for( k2=k1+1; k2<c->K; k2++ ){
00616 for( i=0; i<c->n[k1]; i++ ){
00617 for( j=0; j<c->n[k2]; j++ ){
00618 W += mat_IDX( distmat, c->clust[k1][i], c->clust[k2][j] );
00619 num_comp++;
00620 }
00621 }
00622 }
00623 }
00624 W /= (double)2*num_comp;
00625
00626 return W;
00627 }
00628
00629
00640 Dendrogram* agglomerative_clustering(const Array *distmat, LinkageFunction distfct){
00641 Dendrogram **nodes, *tmp;
00642 double min_d, cur_d=0.0;
00643 int min_i=0, min_j=0;
00644 int num_nodes;
00645 int i, j;
00646 bool ismatrix;
00647 matrix_CHECKSQR( ismatrix, distmat );
00648 if( !ismatrix) return NULL;
00649 int N=distmat->size[1];
00650
00651 nodes = (Dendrogram**) malloc( N*sizeof( Dendrogram* ) );
00652
00653 int clustidx=0;
00654 for( i=0; i<N; i++ ){
00655 nodes[i] = dgram_init( i, NULL, NULL );
00656 nodes[i]->clustnum=clustidx++;
00657 }
00658 num_nodes = N;
00659
00660 while( num_nodes > 1 ){
00661 dprintf("nmodes = %i\n", num_nodes );
00662 min_d = DBL_MAX;
00663 for( i=0; i<num_nodes-1; i++ ){
00664 for( j=i+1; j<num_nodes; j++ ){
00665 cur_d = distfct( distmat, nodes[i], nodes[j] );
00666 if( cur_d<=min_d ){
00667 min_d=cur_d;
00668 min_i=i;
00669 min_j=j;
00670 }
00671 }
00672 }
00673
00674
00675 tmp = dgram_init( -1, nodes[min_i], nodes[min_j] );
00676 tmp->height=min_d;
00677 tmp->clustnum=clustidx++;
00678 nodes[min_i] = tmp;
00679 tmp = nodes[num_nodes-1];
00680 nodes[min_j] = tmp;
00681 num_nodes--;
00682 dprintf("Linking (%i,%i), height=%f\n", min_i, min_j, min_d );
00683 }
00684 tmp = nodes[0];
00685 free( nodes );
00686 return tmp;
00687 }
00688
00690 void _dgram_walk_matlab( const Dendrogram *t, Array *a ){
00691 if( t->val>=0 ){
00692 return;
00693 }
00694 mat_IDX( a, t->clustnum, 0 ) = t->left->clustnum+1;
00695 mat_IDX( a, t->clustnum, 1 ) = t->right->clustnum+1;
00696 mat_IDX( a, t->clustnum, 2 ) = t->height;
00697 _dgram_walk_matlab( t->left, a );
00698 _dgram_walk_matlab( t->right, a );
00699 }
00740 Array* dgram_to_matlab( const Dendrogram *dgram ){
00741 int N=dgram_num_leaves( dgram );
00742 Array *mat=array_new2( DOUBLE, 2, N+N-1, 3 );
00743 _dgram_walk_matlab( dgram, mat );
00744 char buf[200];
00745 sprintf( buf, "%i-%i,:", N, N+N-2 );
00746 Array *rmat=array_slice( mat, buf );
00747 array_free(mat);
00748 return rmat;
00749 }
00750
00754 Dendrogram* dgram_init(int val, Dendrogram *left, Dendrogram *right ){
00755 Dendrogram *c;
00756 c = (Dendrogram*) malloc( sizeof(Dendrogram) );
00757 c->val = val;
00758 c->height=-1.0;
00759 c->clustnum=0;
00760 c->left = left;
00761 c->right= right;
00762 return c;
00763 }
00764
00765
00768 void dgram_free(Dendrogram *t){
00769 if( t->right==NULL && t->left==NULL ){
00770 free( t );
00771 return;
00772 } else {
00773 if( t->right!=NULL ){
00774 dgram_free( t->right );
00775 }
00776 if( t->left!=NULL ){
00777 dgram_free( t->left );
00778 }
00779 }
00780 }
00782 void _dgram_preorder_recursive( const Dendrogram *t, int *vals, int *n ){
00783 if( t->left==NULL && t->right==NULL ){
00784 if( t->val<0 ){
00785 errprintf("t->val<0 (%i) even though it's a terminal node\n", t->val);
00786 return;
00787 }
00788 vals[(*n)++] = t->val;
00789 return;
00790 }
00791 if( t->left!=NULL ){
00792
00793 _dgram_preorder_recursive( t->left, vals, n );
00794 }
00795 if( t->right!=NULL ){
00796 _dgram_preorder_recursive( t->right, vals, n );
00797 }
00798 return;
00799 }
00810 void dgram_preorder( const Dendrogram *t, int *vals, int *n ){
00811 int m;
00812
00813 m = 0;
00814
00815
00816 _dgram_preorder_recursive( t, vals, &m );
00817
00818
00819
00820
00821
00822
00823
00824 *n = m;
00825
00826 return;
00827 }
00833 double dgram_dist_singlelinkage (const Array *d, const Dendrogram *c1, const Dendrogram *c2){
00834 double dist;
00835 int *el1, *el2;
00836 int n1, n2;
00837 int i, j;
00838 bool ismatrix;
00839 matrix_CHECK( ismatrix,d );
00840 if( !ismatrix ) return -1;
00841 if( d->size[0]!=d->size[1] ){
00842 errprintf("distmat must be N x N, got (%i,%i)\n",d->size[0],d->size[1]);
00843 return -1;
00844 }
00845 int N = d->size[0];
00846
00847 dist = DBL_MAX;
00848
00849 el1 = (int*) malloc( N*sizeof( int ) );
00850 el2 = (int*) malloc( N*sizeof( int ) );
00851
00852 dgram_preorder( c1, el1, &n1 );
00853 dgram_preorder( c2, el2, &n2 );
00854
00855 for( i=0; i<n1; i++ ){
00856 for( j=0; j<n2; j++ ){
00857 if( el1[i]<0 || el1[i]>=N || el2[j]<0 || el2[j]>=N ){
00858 errprintf("something's wrong, el[i] not in range 0-%i\n", N);
00859 return -1;
00860 }
00861 if( mat_IDX( d, el1[i], el2[j]) < dist ){
00862 dist = mat_IDX( d, el1[i], el2[j]);
00863 }
00864 }
00865 }
00866
00867
00868
00869 free(el1);
00870 free(el2);
00871 return dist;
00872 }
00873
00879 double dgram_dist_completelinkage(const Array *d, const Dendrogram *c1, const Dendrogram *c2){
00880 double dist;
00881 int *el1, *el2;
00882 int n1, n2;
00883 int i, j;
00884 bool ismatrix;
00885 matrix_CHECK( ismatrix,d );
00886 if( !ismatrix ) return -1;
00887 if( d->size[0]!=d->size[1] ){
00888 errprintf("distmat must be N x N, got (%i,%i)\n",d->size[0],d->size[1]);
00889 return -1;
00890 }
00891 int N = d->size[0];
00892
00893 dist = DBL_MIN;
00894
00895 el1 = (int*) malloc( N*sizeof( int ) );
00896 el2 = (int*) malloc( N*sizeof( int ) );
00897
00898 dgram_preorder( c1, el1, &n1 );
00899 dgram_preorder( c2, el2, &n2 );
00900
00901 for( i=0; i<n1; i++ ){
00902 for( j=0; j<n2; j++ ){
00903 if( el1[i]<0 || el1[i]>=N || el2[j]<0 || el2[j]>=N ){
00904 errprintf("something's wrong, el[i] not in range 0-%i\n", N);
00905 return -1;
00906 }
00907 if( mat_IDX( d, el1[i], el2[j] )>dist ){
00908 dist = mat_IDX( d, el1[i], el2[j]);
00909 }
00910 }
00911 }
00912
00913
00914
00915 free(el1);
00916 free(el2);
00917 return dist;
00918 }
00919
00920
00922 #define END_NODE 0
00923 #define INTERMEDIATE_NODE 1
00924 #define NULL_NODE 2
00925 int _dgram_get_deepest_recursive( Dendrogram *c, Dendrogram **candidate, int *depth, int curdepth ){
00926 int status_left=NULL_NODE;
00927 int status_right=NULL_NODE;
00928
00929 if( c->left==NULL && c->right==NULL ){
00930 return END_NODE;
00931 }
00932
00933 if( c->left!=NULL ){
00934 status_left = _dgram_get_deepest_recursive( c->left, candidate, depth, curdepth+1 );
00935 }
00936 if( c->right!=NULL ){
00937 status_right = _dgram_get_deepest_recursive( c->right, candidate, depth, curdepth+1 );
00938 }
00939
00940 if( status_left==END_NODE && status_right==END_NODE ){
00941
00942 if( curdepth>=*depth ){
00943 *candidate = c;
00944 *depth = curdepth;
00945 }
00946 dprintf("END_NODE visited, cd=%i, d=%i, c=%p, candidate=%p\n", curdepth, *depth, c, candidate);
00947 }
00948
00949 return INTERMEDIATE_NODE;
00950 }
00956 Dendrogram* dgram_get_deepest( Dendrogram *c ){
00957 Dendrogram **d, *tmp;
00958 int depth;
00959 d = (Dendrogram **) malloc( sizeof( Dendrogram* ) );
00960
00961 depth = 0;
00962 _dgram_get_deepest_recursive( c, d, &depth, 0 );
00963 dprintf("deepest=%p, depth=%i\n", *d, depth);
00964 tmp = *d;
00965 free(d);
00966 return tmp;
00967 }
00968
00969
00971 void _dgram_print_recursive( Dendrogram *t, int level ){
00972 int i;
00973
00974 fprintf( stdout, "'%p(%4i,%.2f,%4i)' - ", t, t->clustnum, t->height, t->val );
00975 if( t->left ){
00976 _dgram_print_recursive( t->left, level+1 );
00977 }
00978
00979 fprintf( stdout, "\n" );
00980 for( i=0; i<level; i++)
00981 fprintf( stdout, " " );
00982
00983 if( t->right ){
00984 _dgram_print_recursive( t->right, level+1 );
00985 }
00986 }
00989 void dgram_print( Dendrogram *t ){
00990 _dgram_print_recursive( t, 0 );
00991 fprintf( stdout, "\n");
00992 }
00993
00996 int dgram_num_leaves( const Dendrogram *t ){
00997 int r=0;
00998 if( t==NULL ){
00999 return 0;
01000 } else if( t->val!=-1 ){
01001 r=1;
01002 }
01003 r += dgram_num_leaves( t->left );
01004 r += dgram_num_leaves( t->right );
01005 return r;
01006 }
01007
01008 void dgram_print_node( Dendrogram *t ){
01009 fprintf( stdout, "t=%p, val=%i, height=%f, left=%p, right=%p\n", t, t->val, t->height, t->left, t->right );
01010 }
01011 #undef END_NODE
01012 #undef INTERMEDIATE_NODE
01013 #undef NULL_NODE
01014
01015 #if 0
01016
01026 int eeg_best_num_clusters_gapstat( const EEG *eeg, VectorDistanceFunction distfunction, OptArgList *optargs ){
01027 GapStatistic *gap;
01028 double x;
01029 int max_num_clusters;
01030 int num_ref_dist;
01031 EEG *meaneeg;
01032 double **D;
01033 int bestclust=0;
01034
01035 max_num_clusters = 10;
01036 num_ref_dist = eeg->ntrials;
01037 if( optarglist_has_key( optargs, "max_num_clusters" ) ){
01038 x = optarglist_scalar_by_key( optargs, "max_num_clusters" );
01039 if( !isnan( x ) ) max_num_clusters=(int)x;
01040 }
01041 if( optarglist_has_key( optargs, "num_ref_dist" ) ){
01042 x = optarglist_scalar_by_key( optargs, "num_ref_dist" );
01043 if( !isnan( x ) ) num_ref_dist=(int)x;
01044 }
01045
01046 meaneeg = eeg_average_channels( eeg );
01047 eeg_print( stderr, meaneeg, 3 );
01048 D = eeg_distmatrix( meaneeg, distfunction, ALLOC_IN_FCT, optargs );
01049
01050 gap = gapstat_init( NULL, max_num_clusters, num_ref_dist );
01051 gapstat_calculate( gap, meaneeg->data[0], meaneeg->ntrials, meaneeg->n,
01052 distfunction, (const double**)D );
01053
01054 bestclust = gap->khat;
01055 #ifdef DEBUG
01056 gapstat_print( stderr, gap );
01057 #endif
01058
01059 dblpp_free( D, meaneeg->ntrials );
01060 eeg_free( meaneeg );
01061 gapstat_free( gap );
01062
01063 return bestclust;
01064 }
01065 #endif