Statistics for MySQL  1.1
 All Classes Files Functions Variables Typedefs Macros Pages
gini.cc
Go to the documentation of this file.
1 /* gini.cc (Gini coefficient) */
2 
3 /***********************************************************************
4 * This code is part of Statistics for MySQL.
5 *
6 * Copyright (C) 2015 Heinrich Schuchardt (xypron.glpk@gmx.de)
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 ***********************************************************************/
20 
21 
31 #include "sqlstat.h"
32 #include <set>
33 
34 namespace sqlstat {
35 
36 struct Item {
37 public:
38  double x;
39  double w;
40 };
41 
43 public:
50  bool operator() (const Item& left_item, const Item& right_item) const {
51  return left_item.x < right_item.x;
52  }
53 };
54 
55 typedef std::multiset<Item, ItemComperator> ItemSet;
56 
57 struct gini_storage {
58  int argc;
59  ItemSet data;
60 };
61 
62 }
63 
76 my_bool gini_init(UDF_INIT *initid, UDF_ARGS *args, char *message) {
78 
79  if (args->arg_count < 1 || args->arg_count > 2) {
80  strcpy(message,"gini(x [,w]) requires one or two arguments");
81  return 1;
82  }
83  args->arg_type[0] = REAL_RESULT;
84  if (args->arg_count > 1) {
85  args->arg_type[1] = REAL_RESULT;
86  }
87 
88  data = new sqlstat::gini_storage();
89  if (data == NULL) {
90  strcpy(message,"Couldn't allocate memory");
91  return 1;
92  }
93  data->argc = args->arg_count;
94 
95  initid->maybe_null = 1;
96  initid->decimals = NOT_FIXED_DEC;
97  initid->max_length = 13 + initid->decimals;
98  initid->ptr = (char *) data;
99  initid->const_item = 0;
100 
101  return 0;
102 }
103 
115 void gini_reset(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) {
116  gini_clear(initid, is_null, error);
117  gini_add(initid, args, is_null, error);
118 }
119 
129 void gini_clear(UDF_INIT *initid, char *is_null, char *error) {
130  struct sqlstat::gini_storage *data;
131 
132  data = (struct sqlstat::gini_storage *) initid->ptr;
133  data->data.clear();
134 }
135 
146 void gini_add(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) {
147  struct sqlstat::gini_storage *data;
148  sqlstat::Item i;
149 
150  if (!args->args[0]) {
151  return;
152  }
153  data = (struct sqlstat::gini_storage *) initid->ptr;
154  i.x = *((double*) args->args[0]);
155  if (data->argc > 1 && args->args[1]) {
156  i.w = *((double*) args->args[1]);
157  } else {
158  i.w = 1;
159  }
160  try {
161  data->data.insert(i);
162  } catch (std::bad_alloc e) {
163  *error = 1;
164  }
165 }
166 
176 double gini(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) {
177  struct sqlstat::gini_storage *data;
178  sqlstat::ItemSet::iterator pos;
179 
180  double sumx = 0;
181  double sumw = 0;
182  double ret = 0;
183 
184  data = (struct sqlstat::gini_storage *) initid->ptr;
185 
186  for(pos = data->data.begin(); pos != data->data.end(); ++pos) {
187  double w = (*pos).w;
188  double x = w * (*pos).x;
189  ret += w * (2. * sumx + x);
190  sumx += x;
191  sumw += w;
192  }
193 
194  if (sumx == 0 || sumw == 0) {
195  *is_null = 1;
196  return 0;
197  }
198 
199  return 1. - ret / (sumx * sumw);
200 }
201 
209 void gini_deinit(UDF_INIT *initid) {
210  struct sqlstat::gini_storage *data;
211  if (initid->ptr) {
212  data = (struct sqlstat::gini_storage *) initid->ptr;
213  delete data;
214  }
215 }
Definition of functions for UDFs and plugins.
double gini(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error)
Retrieve gini. Called at end of group.
Definition: gini.cc:176
my_bool gini_init(UDF_INIT *initid, UDF_ARGS *args, char *message)
Called before first usage of function.
Definition: gini.cc:76
int argc
number of arguments
Definition: gini.cc:58
bool operator()(const Item &left_item, const Item &right_item) const
Definition: gini.cc:50
void gini_reset(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error)
Reset function and add first group member Calls clear and add.
Definition: gini.cc:115
#define NOT_FIXED_DEC
Maximum number of digits in double As defined in mysql/sql_string.h.
Definition: sqlstat.h:77
void gini_add(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error)
Add a member of the group.
Definition: gini.cc:146
ItemSet data
collected data
Definition: gini.cc:59
void gini_deinit(UDF_INIT *initid)
Called after last access to function.
Definition: gini.cc:209
void gini_clear(UDF_INIT *initid, char *is_null, char *error)
Called at start of group.
Definition: gini.cc:129
Definition: gini.cc:34