diff --git a/Makefile b/Makefile index 8a9dec7..4bbc9d6 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,7 @@ DATA_built = $(RELEASE_SQL) \ DOCS = README.pg_sphere COPYRIGHT.pg_sphere TESTS = version tables points euler circle line ellipse poly path box \ index contains_ops contains_ops_compat bounding_box_gist gnomo \ - epochprop contains overlaps spoint_brin sbox_brin selectivity + epochprop contains overlaps spoint_brin sbox_brin selectivity knn REGRESS = init $(TESTS) PG_CFLAGS += -DPGSPHERE_VERSION=$(PGSPHERE_VERSION) diff --git a/doc/indices.sgm b/doc/indices.sgm index 998b546..c27ef5e 100644 --- a/doc/indices.sgm +++ b/doc/indices.sgm @@ -68,6 +68,11 @@ + + GiST index can be used also for fast finding points closest to the given one + when ordering by an expression with the <-> operator + is used, as shown in an example below. + BRIN indexing supports just spherical points (spoint) and spherical coordinates range (sbox) at the moment. @@ -82,6 +87,13 @@ + + To find points closest to a given spherical position, use the <-> operator: + + + spoint (0.2, 0.3) LIMIT 10 ]]> + + BRIN index can be created through the following syntax: @@ -100,7 +112,6 @@ - diff --git a/expected/knn.out b/expected/knn.out new file mode 100644 index 0000000..49c8903 --- /dev/null +++ b/expected/knn.out @@ -0,0 +1,122 @@ +CREATE TABLE points (id int, p spoint, pos int); +INSERT INTO points (id, p) SELECT x, spoint(random()*6.28, (2*random()-1)*1.57) FROM generate_series(1,314159) x; +CREATE INDEX i ON points USING gist (p); +SET enable_indexscan = true; +EXPLAIN (costs off) SELECT p <-> spoint (0.2, 0.3) FROM points ORDER BY 1 LIMIT 100; + QUERY PLAN +------------------------------------------------- + Limit + -> Index Scan using i on points + Order By: (p <-> '(0.2 , 0.3)'::spoint) +(3 rows) + +UPDATE points SET pos = n FROM + (SELECT id, row_number() OVER (ORDER BY p <-> spoint (0.2, 0.3)) n FROM points ORDER BY p <-> spoint (0.2, 0.3) LIMIT 100) sel + WHERE points.id = sel.id; +SET enable_indexscan = false; +SELECT pos, row_number() OVER (ORDER BY p <-> spoint (0.2, 0.3)) n FROM points ORDER BY p <-> spoint (0.2, 0.3) LIMIT 100; + pos | n +-----+----- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 + 31 | 31 + 32 | 32 + 33 | 33 + 34 | 34 + 35 | 35 + 36 | 36 + 37 | 37 + 38 | 38 + 39 | 39 + 40 | 40 + 41 | 41 + 42 | 42 + 43 | 43 + 44 | 44 + 45 | 45 + 46 | 46 + 47 | 47 + 48 | 48 + 49 | 49 + 50 | 50 + 51 | 51 + 52 | 52 + 53 | 53 + 54 | 54 + 55 | 55 + 56 | 56 + 57 | 57 + 58 | 58 + 59 | 59 + 60 | 60 + 61 | 61 + 62 | 62 + 63 | 63 + 64 | 64 + 65 | 65 + 66 | 66 + 67 | 67 + 68 | 68 + 69 | 69 + 70 | 70 + 71 | 71 + 72 | 72 + 73 | 73 + 74 | 74 + 75 | 75 + 76 | 76 + 77 | 77 + 78 | 78 + 79 | 79 + 80 | 80 + 81 | 81 + 82 | 82 + 83 | 83 + 84 | 84 + 85 | 85 + 86 | 86 + 87 | 87 + 88 | 88 + 89 | 89 + 90 | 90 + 91 | 91 + 92 | 92 + 93 | 93 + 94 | 94 + 95 | 95 + 96 | 96 + 97 | 97 + 98 | 98 + 99 | 99 + 100 | 100 +(100 rows) + +DROP TABLE points; diff --git a/pgs_gist.sql.in b/pgs_gist.sql.in index e8ac119..3ac9971 100644 --- a/pgs_gist.sql.in +++ b/pgs_gist.sql.in @@ -98,12 +98,15 @@ CREATE FUNCTION g_spoint_compress(internal) AS 'MODULE_PATHNAME', 'g_spoint_compress' LANGUAGE 'c'; - CREATE FUNCTION g_spoint_consistent(internal, internal, int4, oid, internal) RETURNS internal AS 'MODULE_PATHNAME', 'g_spoint_consistent' LANGUAGE 'c'; +CREATE FUNCTION g_spoint_distance(internal, spoint, smallint, oid, internal) + RETURNS internal + AS 'MODULE_PATHNAME', 'g_spoint_distance' + LANGUAGE 'c'; CREATE OPERATOR CLASS spoint DEFAULT FOR TYPE spoint USING gist AS @@ -114,6 +117,7 @@ CREATE OPERATOR CLASS spoint OPERATOR 14 @ (spoint, spoly), OPERATOR 15 @ (spoint, sellipse), OPERATOR 16 @ (spoint, sbox), + OPERATOR 17 <-> (spoint, spoint) FOR ORDER BY float_ops, OPERATOR 37 <@ (spoint, scircle), OPERATOR 38 <@ (spoint, sline), OPERATOR 39 <@ (spoint, spath), @@ -127,6 +131,7 @@ CREATE OPERATOR CLASS spoint FUNCTION 5 g_spherekey_penalty (internal, internal, internal), FUNCTION 6 g_spherekey_picksplit (internal, internal), FUNCTION 7 g_spherekey_same (spherekey, spherekey, internal), + FUNCTION 8 g_spoint_distance (internal, spoint, smallint, oid, internal), STORAGE spherekey; diff --git a/sql/knn.sql b/sql/knn.sql new file mode 100644 index 0000000..0207c98 --- /dev/null +++ b/sql/knn.sql @@ -0,0 +1,12 @@ +CREATE TABLE points (id int, p spoint, pos int); +INSERT INTO points (id, p) SELECT x, spoint(random()*6.28, (2*random()-1)*1.57) FROM generate_series(1,314159) x; +CREATE INDEX i ON points USING gist (p); +SET enable_indexscan = true; +EXPLAIN (costs off) SELECT p <-> spoint (0.2, 0.3) FROM points ORDER BY 1 LIMIT 100; +UPDATE points SET pos = n FROM + (SELECT id, row_number() OVER (ORDER BY p <-> spoint (0.2, 0.3)) n FROM points ORDER BY p <-> spoint (0.2, 0.3) LIMIT 100) sel + WHERE points.id = sel.id; +SET enable_indexscan = false; +SELECT pos, row_number() OVER (ORDER BY p <-> spoint (0.2, 0.3)) n FROM points ORDER BY p <-> spoint (0.2, 0.3) LIMIT 100; +DROP TABLE points; + diff --git a/src/gist.c b/src/gist.c index b3223a5..f373448 100644 --- a/src/gist.c +++ b/src/gist.c @@ -36,6 +36,7 @@ PG_FUNCTION_INFO_V1(g_spherekey_penalty); PG_FUNCTION_INFO_V1(g_spherekey_picksplit); PG_FUNCTION_INFO_V1(g_spoint3_penalty); PG_FUNCTION_INFO_V1(g_spoint3_picksplit); +PG_FUNCTION_INFO_V1(g_spoint_distance); PG_FUNCTION_INFO_V1(g_spoint3_distance); PG_FUNCTION_INFO_V1(g_spoint3_fetch); @@ -681,6 +682,10 @@ g_spoint3_consistent(PG_FUNCTION_ARGS) PG_RETURN_BOOL(false); } +static double distance_vector_point_3d (Vector3D* v, double x, double y, double z) { + return acos ( (v->x * x + v->y * y + v->z * z) / sqrt( x*x + y*y + z*z ) ); // as v has length=1 by design +} + Datum g_spoint3_distance(PG_FUNCTION_ARGS) { @@ -1672,6 +1677,127 @@ fallbackSplit(Box3D *boxes, OffsetNumber maxoff, GIST_SPLITVEC *v) v->spl_ldatum_exists = v->spl_rdatum_exists = false; } + +Datum +g_spoint_distance(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + Box3D* box = (Box3D *) DatumGetPointer(entry->key); + double retval; + SPoint *point = (SPoint *) PG_GETARG_POINTER(1); + Vector3D v_point, v_low, v_high; + + switch (strategy) + { + case 17: + // Prepare data for calculation + spoint_vector3d(&v_point, point); + v_low.x = (double)box->low.coord[0] / MAXCVALUE; + v_low.y = (double)box->low.coord[1] / MAXCVALUE; + v_low.z = (double)box->low.coord[2] / MAXCVALUE; + v_high.x = (double)box->high.coord[0] / MAXCVALUE; + v_high.y = (double)box->high.coord[1] / MAXCVALUE; + v_high.z = (double)box->high.coord[2] / MAXCVALUE; + // a box splits space into 27 subspaces (6+12+8+1) with different distance calculation + if(v_point.x < v_low.x) { + if(v_point.y < v_low.y) { + if(v_point.z < v_low.z) { + retval = distance_vector_point_3d (&v_point, v_low.x, v_low.y, v_low.z); //point2point distance + } else if (v_point.z < v_high.z) { + retval = distance_vector_point_3d (&v_point, v_low.x, v_low.y, v_point.z); //point2line distance + } else { + retval = distance_vector_point_3d (&v_point, v_low.x, v_low.y, v_high.z); //point2point distance + } + } else if(v_point.y < v_high.y) { + if(v_point.z < v_low.z) { + retval = distance_vector_point_3d (&v_point, v_low.x, v_point.y , v_low.z); //point2line distance + } else if (v_point.z < v_high.z) { + retval = distance_vector_point_3d (&v_point, v_low.x, v_point.y , v_point.z); //point2plane distance + } else { + retval = distance_vector_point_3d (&v_point, v_low.x, v_point.y, v_high.z); //point2line distance + } + } else { + if(v_point.z < v_low.z) { + retval = distance_vector_point_3d (&v_point, v_low.x, v_high.y, v_low.z); //point2point distance + } else if (v_point.z < v_high.z) { + retval = distance_vector_point_3d (&v_point, v_low.x, v_high.y, v_point.z); //point2line distance + } else { + retval = distance_vector_point_3d (&v_point, v_low.x, v_high.y, v_high.z); //point2point distance + } + } + } else if(v_point.x < v_high.x) { + if(v_point.y < v_low.y) { + if(v_point.z < v_low.z) { + retval = distance_vector_point_3d (&v_point, v_point.x, v_low.y, v_low.z); //p2line distance + } else if (v_point.z < v_high.z) { + retval = distance_vector_point_3d (&v_point, v_point.x, v_low.y, v_point.z); //point2plane distance + } else { + retval = distance_vector_point_3d (&v_point, v_point.x, v_low.y, v_high.z); //point2line distance + } + } else if(v_point.y < v_high.y) { + if(v_point.z < v_low.z) { + retval = distance_vector_point_3d (&v_point, v_point.x, v_point.y , v_low.z); //point2plane distance + } else if (v_point.z < v_high.z) { + retval = 0; // inside cube + } else { + retval = distance_vector_point_3d (&v_point, v_point.x, v_point.y, v_high.z); //point2plane distance + } + } else { + if(v_point.z < v_low.z) { + retval = distance_vector_point_3d (&v_point, v_point.x, v_high.y, v_low.z); //point2line distance + } else if (v_point.z < v_high.z) { + retval = distance_vector_point_3d (&v_point, v_point.x, v_high.y, v_point.z); //point2plane distance + } else { + retval = distance_vector_point_3d (&v_point, v_point.x, v_high.y, v_high.z); //point2line distance + } + } + } else { + if(v_point.y < v_low.y) { + if(v_point.z < v_low.z) { + retval = distance_vector_point_3d (&v_point, v_high.x, v_low.y, v_low.z); //p2p distance + } else if (v_point.z < v_high.z) { + retval = distance_vector_point_3d (&v_point, v_high.x, v_low.y, v_point.z); //point2line distance + } else { + retval = distance_vector_point_3d (&v_point, v_high.x, v_low.y, v_high.z); //point2point distance + } + } else if(v_point.y < v_high.y) { + if(v_point.z < v_low.z) { + retval = distance_vector_point_3d (&v_point, v_high.x, v_point.y , v_low.z); //point2line distance + } else if (v_point.z < v_high.z) { + retval = distance_vector_point_3d (&v_point, v_high.x, v_point.y , v_point.z); //point2plane distance + } else { + retval = distance_vector_point_3d (&v_point, v_high.x, v_point.y, v_high.z); //point2line distance + } + } else { + if(v_point.z < v_low.z) { + retval = distance_vector_point_3d (&v_point, v_high.x, v_high.y, v_low.z); //point2point distance + } else if (v_point.z < v_high.z) { + retval = distance_vector_point_3d (&v_point, v_high.x, v_high.y, v_point.z); //point2line distance + } else { + retval = distance_vector_point_3d (&v_point, v_high.x, v_high.y, v_high.z); //point2point distance + } + } + } + + elog(DEBUG1, "distance (%lg,%lg,%lg %lg,%lg,%lg) <-> (%lg,%lg) = %lg", + v_low.x, v_low.y, v_low.z, + v_high.x, v_high.y, v_high.z, + point->lng, point->lat, + retval + ); + break; + + default: + elog(ERROR, "unrecognized cube strategy number: %d", strategy); + retval = 0; /* keep compiler quiet */ + break; + } + PG_RETURN_FLOAT8(retval); +} + + + /* * Represents information about an entry that can be placed to either group * without affecting overlap over selected axis ("common entry").