000001 hash-threshold 8 000002 000003 statement ok 000004 CREATE TABLE t1( x INTEGER, y VARCHAR(8) ) 000005 000006 statement ok 000007 INSERT INTO t1 VALUES(1,'true') 000008 000009 statement ok 000010 INSERT INTO t1 VALUES(0,'false') 000011 000012 statement ok 000013 INSERT INTO t1 VALUES(NULL,'NULL') 000014 000015 statement ok 000016 CREATE INDEX t1i1 ON t1(x) 000017 000018 skipif sqlite 000019 halt 000020 000021 # count(x), avg(x), sum(x), total(x), min(x), max(x) 000022 # group_concat(x), group_concat(x,y) 000023 000024 # EVIDENCE-OF: R-00466-56349 In any aggregate function that takes a 000025 # single argument, that argument can be preceded by the keyword 000026 # DISTINCT. 000027 000028 query I nosort 000029 SELECT count(DISTINCT x) FROM t1 000030 ---- 000031 2 000032 000033 query I nosort 000034 SELECT avg(DISTINCT x) FROM t1 000035 ---- 000036 0 000037 000038 query I nosort 000039 SELECT sum(DISTINCT x) FROM t1 000040 ---- 000041 1 000042 000043 query I nosort 000044 SELECT total(DISTINCT x) FROM t1 000045 ---- 000046 1 000047 000048 query I nosort 000049 SELECT min(DISTINCT x) FROM t1 000050 ---- 000051 0 000052 000053 query I nosort 000054 SELECT max(DISTINCT x) FROM t1 000055 ---- 000056 1 000057 000058 query T nosort 000059 SELECT group_concat(DISTINCT x) FROM t1 NOT INDEXED 000060 ---- 000061 1,0 000062 000063 000064 # EVIDENCE-OF: R-00171-59428 In such cases, duplicate elements are 000065 # filtered before being passed into the aggregate function. 000066 000067 # EVIDENCE-OF: R-31453-41389 For example, the function "count(distinct 000068 # X)" will return the number of distinct values of column X instead of 000069 # the total number of non-null values in column X. 000070 000071 statement ok 000072 INSERT INTO t1 VALUES(2,'true') 000073 000074 statement ok 000075 INSERT INTO t1 VALUES(2,'true') 000076 000077 query I nosort 000078 SELECT count(DISTINCT x) FROM t1 000079 ---- 000080 3 000081 000082 000083 # EVIDENCE-OF: R-20409-33051 The avg() function returns the average 000084 # value of all non-NULL X within a group. 000085 000086 query I nosort 000087 SELECT avg(x) FROM t1 000088 ---- 000089 1 000090 000091 000092 # EVIDENCE-OF: R-29052-00975 String and BLOB values that do not look 000093 # like numbers are interpreted as 0. 000094 000095 query I nosort 000096 SELECT count(y) FROM t1 000097 ---- 000098 5 000099 000100 query I nosort 000101 SELECT avg(y) FROM t1 000102 ---- 000103 0 000104 000105 query I nosort 000106 SELECT sum(y) FROM t1 000107 ---- 000108 0 000109 000110 query I nosort 000111 SELECT total(y) FROM t1 000112 ---- 000113 0 000114 000115 query I nosort 000116 SELECT min(y) FROM t1 000117 ---- 000118 0 000119 000120 query I nosort 000121 SELECT max(y) FROM t1 000122 ---- 000123 0 000124 000125 query T nosort 000126 SELECT group_concat(y) FROM t1 000127 ---- 000128 true,false,NULL,true,true 000129 000130 # repeat with DISTINCT 000131 000132 query I nosort 000133 SELECT count(DISTINCT y) FROM t1 000134 ---- 000135 3 000136 000137 query I nosort 000138 SELECT avg(DISTINCT y) FROM t1 000139 ---- 000140 0 000141 000142 query I nosort 000143 SELECT sum(DISTINCT y) FROM t1 000144 ---- 000145 0 000146 000147 query I nosort 000148 SELECT total(DISTINCT y) FROM t1 000149 ---- 000150 0 000151 000152 query I nosort 000153 SELECT min(DISTINCT y) FROM t1 000154 ---- 000155 0 000156 000157 query I nosort 000158 SELECT max(DISTINCT y) FROM t1 000159 ---- 000160 0 000161 000162 query T nosort 000163 SELECT group_concat(DISTINCT y) FROM t1 000164 ---- 000165 true,false,NULL 000166 000167 000168 # EVIDENCE-OF: R-07535-34995 The result of avg() is always a floating 000169 # point value as long as at there is at least one non-NULL input even if 000170 # all inputs are integers. 000171 000172 query R nosort 000173 SELECT avg(x) FROM t1 000174 ---- 000175 1.250 000176 000177 query R nosort 000178 SELECT avg(DISTINCT x) FROM t1 000179 ---- 000180 1.000 000181 000182 000183 # EVIDENCE-OF: R-40597-22164 The result of avg() is NULL if and only if 000184 # there are no non-NULL inputs. 000185 000186 query I nosort label-NULL 000187 SELECT avg(x) FROM t1 WHERE y='null' 000188 ---- 000189 NULL 000190 000191 query I nosort label-NULL 000192 SELECT avg(DISTINCT x) FROM t1 WHERE y='null' 000193 ---- 000194 NULL 000195 000196 000197 # EVIDENCE-OF: R-34280-42283 The count(X) function returns a count of 000198 # the number of times that X is not NULL in a group. 000199 000200 query I nosort 000201 SELECT count(x) FROM t1 WHERE y='null' 000202 ---- 000203 0 000204 000205 query I nosort 000206 SELECT count(DISTINCT x) FROM t1 WHERE y='null' 000207 ---- 000208 0 000209 000210 query I nosort 000211 SELECT count(x) FROM t1 WHERE y='false' 000212 ---- 000213 1 000214 000215 query I nosort 000216 SELECT count(DISTINCT x) FROM t1 WHERE y='false' 000217 ---- 000218 1 000219 000220 000221 # EVIDENCE-OF: R-13776-21310 The count(*) function (with no arguments) 000222 # returns the total number of rows in the group. 000223 000224 query I nosort 000225 SELECT count(*) FROM t1 WHERE y='false' 000226 ---- 000227 1 000228 000229 # TBD: can DISTINCT be used with *? 000230 000231 statement error 000232 SELECT count(DISTINCT *) FROM t1 WHERE y='false' 000233 000234 000235 # EVIDENCE-OF: R-56088-25150 The group_concat() function returns a 000236 # string which is the concatenation of all non-NULL values of X. 000237 000238 query T nosort 000239 SELECT group_concat(x) FROM t1 NOT INDEXED 000240 ---- 000241 1,0,2,2 000242 000243 query T nosort 000244 SELECT group_concat(DISTINCT x) FROM t1 NOT INDEXED 000245 ---- 000246 1,0,2 000247 000248 000249 # EVIDENCE-OF: R-08600-21007 If parameter Y is present then it is used 000250 # as the separator between instances of X. 000251 000252 query T nosort 000253 SELECT group_concat(x,':') FROM t1 NOT INDEXED 000254 ---- 000255 1:0:2:2 000256 000257 # TBD: DISTINCT can only be used with single parameters 000258 statement error 000259 SELECT group_concat(DISTINCT x,':') FROM t1 000260 000261 000262 # EVIDENCE-OF: R-39910-14723 A comma (",") is used as the separator if Y 000263 # is omitted. 000264 000265 query T nosort 000266 SELECT group_concat(x) FROM t1 NOT INDEXED 000267 ---- 000268 1,0,2,2 000269 000270 query T nosort 000271 SELECT group_concat(DISTINCT x) FROM t1 NOT INDEXED 000272 ---- 000273 1,0,2 000274 000275 000276 # EVIDENCE-OF: R-52585-35928 The max() aggregate function returns the 000277 # maximum value of all values in the group. 000278 000279 query I nosort 000280 SELECT max(x) FROM t1 000281 ---- 000282 2 000283 000284 query I nosort 000285 SELECT max(DISTINCT x) FROM t1 000286 ---- 000287 2 000288 000289 000290 # TBD: last non-NULL value 000291 # EVIDENCE-OF: R-13053-11096 The maximum value is the value that would 000292 # be returned last in an ORDER BY on the same column. 000293 000294 query I nosort 000295 SELECT x FROM t1 WHERE x NOT NULL ORDER BY x 000296 ---- 000297 0 000298 1 000299 2 000300 2 000301 000302 query I nosort 000303 SELECT DISTINCT x FROM t1 WHERE x NOT NULL ORDER BY x 000304 ---- 000305 0 000306 1 000307 2 000308 000309 000310 # EVIDENCE-OF: R-50775-16353 Aggregate max() returns NULL if and only if 000311 # there are no non-NULL values in the group. 000312 000313 query I nosort label-NULL 000314 SELECT max(x) FROM t1 WHERE y='null' 000315 ---- 000316 NULL 000317 000318 query I nosort label-NULL 000319 SELECT max(DISTINCT x) FROM t1 WHERE y='null' 000320 ---- 000321 NULL 000322 000323 000324 # EVIDENCE-OF: R-16028-39081 The min() aggregate function returns the 000325 # minimum non-NULL value of all values in the group. 000326 000327 query I nosort 000328 SELECT min(x) FROM t1 000329 ---- 000330 0 000331 000332 query I nosort 000333 SELECT min(DISTINCT x) FROM t1 000334 ---- 000335 0 000336 000337 000338 # EVIDENCE-OF: R-30311-39793 The minimum value is the first non-NULL 000339 # value that would appear in an ORDER BY of the column. 000340 000341 query I nosort 000342 SELECT x FROM t1 WHERE x NOT NULL ORDER BY x 000343 ---- 000344 0 000345 1 000346 2 000347 2 000348 000349 query I nosort 000350 SELECT DISTINCT x FROM t1 WHERE x NOT NULL ORDER BY x 000351 ---- 000352 0 000353 1 000354 2 000355 000356 000357 # EVIDENCE-OF: R-10396-30188 Aggregate min() returns NULL if and only if 000358 # there are no non-NULL values in the group. 000359 000360 query I nosort label-NULL 000361 SELECT min(x) FROM t1 WHERE y='null' 000362 ---- 000363 NULL 000364 000365 query I nosort label-NULL 000366 SELECT min(DISTINCT x) FROM t1 WHERE y='null' 000367 ---- 000368 NULL 000369 000370 000371 # EVIDENCE-OF: R-58261-63259 The sum() and total() aggregate functions 000372 # return sum of all non-NULL values in the group. 000373 000374 query I nosort label-sum 000375 SELECT sum(x) FROM t1 000376 ---- 000377 5 000378 000379 query I nosort label-sum 000380 SELECT total(x) FROM t1 000381 ---- 000382 5 000383 000384 query I nosort label-sum-distinct 000385 SELECT sum(DISTINCT x) FROM t1 000386 ---- 000387 3 000388 000389 query I nosort label-sum-distinct 000390 SELECT total(DISTINCT x) FROM t1 000391 ---- 000392 3 000393 000394 000395 # EVIDENCE-OF: R-44223-43966 If there are no non-NULL input rows then 000396 # sum() returns NULL but total() returns 0. 000397 000398 query I nosort label-NULL 000399 SELECT sum(x) FROM t1 WHERE y='null' 000400 ---- 000401 NULL 000402 000403 query I nosort label-NULL 000404 SELECT sum(DISTINCT x) FROM t1 WHERE y='null' 000405 ---- 000406 NULL 000407 000408 query I nosort label-zero 000409 SELECT total(x) FROM t1 WHERE y='null' 000410 ---- 000411 0 000412 000413 query I nosort label-zero 000414 SELECT total(DISTINCT x) FROM t1 WHERE y='null' 000415 ---- 000416 0 000417 000418 000419 # EVIDENCE-OF: R-07734-01023 The result of total() is always a floating 000420 # point value. 000421 000422 query R nosort 000423 SELECT total(x) FROM t1 000424 ---- 000425 5.000 000426 000427 query R nosort 000428 SELECT total(DISTINCT x) FROM t1 000429 ---- 000430 3.000 000431 000432 000433 # EVIDENCE-OF: R-19660-56479 The result of sum() is an integer value if 000434 # all non-NULL inputs are integers. 000435 000436 query I nosort label-sum 000437 SELECT sum(x) FROM t1 000438 ---- 000439 5 000440 000441 query I nosort label-sum-distinct 000442 SELECT sum(DISTINCT x) FROM t1 000443 ---- 000444 3 000445 000446 000447 # EVIDENCE-OF: R-47496-23053 If any input to sum() is neither an integer 000448 # or a NULL then sum() returns a floating point value which might be an 000449 # approximation to the true sum. 000450 000451 statement ok 000452 INSERT INTO t1 VALUES(4.0,'true') 000453 000454 query R nosort 000455 SELECT sum(x) FROM t1 000456 ---- 000457 9.000 000458 000459 query R nosort 000460 SELECT sum(DISTINCT x) FROM t1 000461 ---- 000462 7.000 000463 000464 000465 # TBD-EVIDENCE-OF: R-08904-24719 Sum() will throw an "integer overflow" 000466 # exception if all inputs are integers or NULL and an integer overflow 000467 # occurs at any point during the computation. 000468 000469 statement ok 000470 INSERT INTO t1 VALUES(1<<63,'true'); 000471 000472 statement ok 000473 INSERT INTO t1 VALUES(1<<63,'true'); 000474 000475 statement ok 000476 INSERT INTO t1 VALUES(-1,'true'); 000477 000478 query R nosort 000479 SELECT sum(x) FROM t1 000480 ---- 000481 000482 query R nosort 000483 SELECT sum(DISTINCT x) FROM t1 000484 ---- 000485 000486 000487 # TBD-EVIDENCE-OF: R-19553-64528 Total() never throws an integer overflow. 000488 000489 query R nosort 000490 SELECT total(x) FROM t1 000491 ---- 000492 000493 query R nosort 000494 SELECT total(DISTINCT x) FROM t1 000495 ----