Statistics for MySQL  1.1
 All Classes Files Functions Variables Typedefs Macros Pages
corr.c
Go to the documentation of this file.
1 /* corr.c (Pearson correlation coeffient) */
2 
3 /***********************************************************************
4 * This code is part of Statistics for MySQL.
5 *
6 * Copyright (C) 2011 Heinrich Schuchardt (xypron.glpk@gmx.de)
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 ***********************************************************************/
20 
32 #include "sqlstat.h"
33 
37 struct corr_storage {
38  int argc;
39  double count;
40  double sumX;
41  double sumXX;
42  double sumXY;
43  double sumY;
44  double sumYY;
45 };
46 
59 my_bool corr_init(UDF_INIT *initid, UDF_ARGS *args, char *message) {
60  struct corr_storage * data;
61 
62  if (args->arg_count < 2 || args->arg_count > 3) {
63  strcpy(message,"corr() requires two or three arguments");
64  return 1;
65  }
66  args->arg_type[0] = REAL_RESULT;
67  args->arg_type[1] = REAL_RESULT;
68  if (args->arg_count > 2) {
69  args->arg_type[2] = REAL_RESULT;
70  }
71 
72  data = (struct corr_storage *) malloc( sizeof(struct corr_storage));
73  if (data == NULL) {
74  strcpy(message,"Couldn't allocate memory");
75  return 1;
76  }
77  data->argc = args->arg_count;
78 
79  initid->maybe_null = 1;
80  initid->decimals = NOT_FIXED_DEC;
81  initid->max_length = 13 + initid->decimals;
82  initid->ptr = (char *) data;
83  initid->const_item = 0;
84 
85  return 0;
86 }
87 
99 void corr_reset(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) {
100  corr_clear(initid, is_null, error);
101  corr_add(initid, args, is_null, error);
102 }
103 
113 void corr_clear(UDF_INIT *initid, char *is_null, char *error) {
114  struct corr_storage *data;
115  data = (struct corr_storage *) initid->ptr;
116  data->count = 0;
117  data->sumX = 0;
118  data->sumXX = 0;
119  data->sumXY = 0;
120  data->sumY = 0;
121  data->sumYY = 0;
122 }
123 
134 void corr_add(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) {
135  struct corr_storage * data;
136  double w;
137  double x;
138  double y;
139 
140  if (!args->args[0] || !args->args[1]) {
141  return;
142  }
143  data = (struct corr_storage *) initid->ptr;
144  if (data->argc > 2) {
145  if (!args->args[2]) {
146  return;
147  }
148  w = *((double*) args->args[2]);
149  } else {
150  w = 1.;
151  }
152  x = *((double*) args->args[0]);
153  y = *((double*) args->args[1]);
154 
155  data->count += w;
156  data->sumX += w * x;
157  data->sumXX += w * x * x;
158  data->sumXY += w * x * y;
159  data->sumY += w * y;
160  data->sumYY += w * y * y;
161 }
162 
172 double corr(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error) {
173  struct corr_storage * data;
174  double ret;
175  double nx;
176  double ny;
177 
178  data = (struct corr_storage *) initid->ptr;
179 
180  nx = data->count * data->sumXX - data->sumX * data->sumX;
181  ny = data->count * data->sumYY - data->sumY * data->sumY;
182 
183  if (nx <= 0 || ny <= 0) {
184  *is_null = 1;
185  return 0;
186  }
187 
188  ret = (data->count * data->sumXY - data->sumX * data->sumY)
189  / sqrt(nx * ny);
190 
191  return ret;
192 }
193 
201 void corr_deinit(UDF_INIT *initid) {
202  if (initid->ptr) {
203  free(initid->ptr);
204  }
205 }
Definition of functions for UDFs and plugins.
my_bool corr_init(UDF_INIT *initid, UDF_ARGS *args, char *message)
Called before first usage of function.
Definition: corr.c:59
double sumX
Sum of x.
Definition: corr.c:40
double sumXY
Sum of x * y.
Definition: corr.c:42
double corr(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error)
Retrieve correlation coefficient. Called at end of group.
Definition: corr.c:172
double sumXX
Sum of x^2.
Definition: corr.c:41
double sumYY
Sum of y^2.
Definition: corr.c:44
#define NOT_FIXED_DEC
Maximum number of digits in double As defined in mysql/sql_string.h.
Definition: sqlstat.h:77
Storage for corr function.
Definition: corr.c:37
void corr_add(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error)
Add a member of the group.
Definition: corr.c:134
double sumY
Sum of y.
Definition: corr.c:43
double count
Counter.
Definition: corr.c:39
void corr_reset(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error)
Reset function and add first group member Calls clear and add.
Definition: corr.c:99
int argc
number of arguments
Definition: corr.c:38
void corr_deinit(UDF_INIT *initid)
Called after last access to function.
Definition: corr.c:201
void corr_clear(UDF_INIT *initid, char *is_null, char *error)
Called at start of group.
Definition: corr.c:113