SHOGUN  v3.1.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KMeans.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 2007-2009 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _KMEANS_H__
13 #define _KMEANS_H__
14 
15 #include <stdio.h>
16 #include <shogun/lib/common.h>
17 #include <shogun/io/SGIO.h>
21 
22 namespace shogun
23 {
24 class CDistanceMachine;
25 
41 class CKMeans : public CDistanceMachine
42 {
43  public:
45  CKMeans();
46 
52  CKMeans(int32_t k, CDistance* d);
53 
59  CKMeans(int32_t k_i, CDistance* d_i, SGMatrix<float64_t> centers_i );
60  virtual ~CKMeans();
61 
62 
64 
65 
70 
76  virtual bool load(FILE* srcfile);
77 
83  virtual bool save(FILE* dstfile);
84 
89  void set_k(int32_t p_k);
90 
95  int32_t get_k();
96 
101  void set_fixed_centers(bool fixed);
102 
107  bool get_fixed_centers();
108 
113  void set_max_iter(int32_t iter);
114 
120 
126 
132 
137  int32_t get_dimensions();
138 
140  virtual const char* get_name() const { return "KMeans"; }
141 
146  virtual void set_initial_centers(SGMatrix<float64_t> centers);
147 
148  protected:
157  virtual bool train_machine(CFeatures* data=NULL);
158 
160  virtual void store_model_features();
161 
162  virtual bool train_require_labels() const { return false; }
163 
164  private:
165  void init();
166  void set_random_centers(float64_t* weights_set, int32_t* ClList, int32_t XSize);
167  void set_initial_centers(CDenseFeatures<float64_t>* rhs_mus, float64_t* weights_set,
168  float64_t* dists, int32_t* ClList, int32_t XSize);
169  void compute_cluster_variances();
170 
171  protected:
173  int32_t max_iter;
174 
177 
179  int32_t k;
180 
182  int32_t dimensions;
183 
186 
189 
190  private:
191  /* temp variable for cluster centers */
193 };
194 }
195 #endif
196 
virtual const char * get_name() const
Definition: KMeans.h:140
EMachineType
Definition: Machine.h:33
virtual bool save(FILE *dstfile)
Definition: KMeans.cpp:383
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:80
int32_t max_iter
maximum number of iterations
Definition: KMeans.h:173
int32_t dimensions
number of dimensions
Definition: KMeans.h:182
SGVector< float64_t > R
radi of the clusters (size k)
Definition: KMeans.h:185
void set_k(int32_t p_k)
Definition: KMeans.cpp:391
int32_t get_dimensions()
Definition: KMeans.cpp:430
A generic DistanceMachine interface.
virtual ~CKMeans()
Definition: KMeans.cpp:50
virtual bool train_require_labels() const
Definition: KMeans.h:162
SGVector< float64_t > get_radiuses()
Definition: KMeans.cpp:413
KMeans clustering, partitions the data into k (a-priori specified) clusters.
Definition: KMeans.h:41
#define MACHINE_PROBLEM_TYPE(PT)
Definition: Machine.h:115
float64_t get_max_iter()
Definition: KMeans.cpp:408
double float64_t
Definition: common.h:48
virtual bool load(FILE *srcfile)
Definition: KMeans.cpp:376
bool get_fixed_centers()
Definition: KMeans.cpp:440
bool fixed_centers
whether to keep cluster centers fixed or not
Definition: KMeans.h:176
void set_max_iter(int32_t iter)
Definition: KMeans.cpp:402
void set_fixed_centers(bool fixed)
Definition: KMeans.cpp:435
SGMatrix< float64_t > mus_initial
initial centers supplied
Definition: KMeans.h:188
virtual void set_initial_centers(SGMatrix< float64_t > centers)
Definition: KMeans.cpp:54
virtual EMachineType get_classifier_type()
Definition: KMeans.h:69
The class Features is the base class of all feature objects.
Definition: Features.h:62
virtual void store_model_features()
Definition: KMeans.cpp:445
virtual bool train_machine(CFeatures *data=NULL)
Definition: KMeans.cpp:211
int32_t get_k()
Definition: KMeans.cpp:397
int32_t k
the k parameter in KMeans
Definition: KMeans.h:179
SGMatrix< float64_t > get_cluster_centers()
Definition: KMeans.cpp:418

SHOGUN Machine Learning Toolbox - Documentation