GEOSEARCH bybox bug fixes and new fuzzy tester (#8445)

Fix errors of GEOSEARCH bybox search due to:
1. projection of the box to a trapezoid (when the meter box is converted to long / lat it's no longer a box).
2. width and height mismatch

Changes:
- New GEOSEARCH point in rectangle algorithm
- Fix GEOSEARCH bybox width and height mismatch bug
- Add GEOSEARCH bybox testing to the existing "GEOADD + GEORANGE randomized test"
- Add new fuzzy test to stress test the bybox corners and edges
- Add some tests for edge cases of the bybox algorithm

Co-authored-by: Oran Agra <oran@redislabs.com>
This commit is contained in:
Yang Bodong 2021-02-05 00:08:35 +08:00 committed by GitHub
parent 52fb306535
commit ded1655d49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 225 additions and 60 deletions

View File

@ -175,10 +175,10 @@ int extractDistanceOrReply(client *c, robj **argv,
* that should be in the form: <number> <number> <unit>, and return C_OK or C_ERR means success or failure
* *conversions is populated with the coefficient to use in order to convert meters to the unit.*/
int extractBoxOrReply(client *c, robj **argv, double *conversion,
double *height, double *width) {
double *width, double *height) {
double h, w;
if ((getDoubleFromObjectOrReply(c, argv[0], &h, "need numeric height") != C_OK) ||
(getDoubleFromObjectOrReply(c, argv[1], &w, "need numeric width") != C_OK)) {
if ((getDoubleFromObjectOrReply(c, argv[0], &w, "need numeric width") != C_OK) ||
(getDoubleFromObjectOrReply(c, argv[1], &h, "need numeric height") != C_OK)) {
return C_ERR;
}
@ -224,8 +224,10 @@ int geoAppendIfWithinShape(geoArray *ga, GeoShape *shape, double score, sds memb
if (!geohashGetDistanceIfInRadiusWGS84(shape->xy[0], shape->xy[1], xy[0], xy[1],
shape->t.radius*shape->conversion, &distance)) return C_ERR;
} else if (shape->type == RECTANGLE_TYPE) {
if (!geohashGetDistanceIfInRectangle(shape->bounds, shape->xy[0], shape->xy[1],
xy[0], xy[1], &distance)) return C_ERR;
if (!geohashGetDistanceIfInRectangle(shape->t.r.width * shape->conversion,
shape->t.r.height * shape->conversion,
shape->xy[0], shape->xy[1], xy[0], xy[1], &distance))
return C_ERR;
}
/* Append the new element. */
@ -635,8 +637,8 @@ void georadiusGeneric(client *c, int srcKeyIndex, int flags) {
flags & GEOSEARCH &&
!byradius)
{
if (extractBoxOrReply(c, c->argv+base_args+i+1, &shape.conversion, &shape.t.r.height,
&shape.t.r.width) != C_OK) return;
if (extractBoxOrReply(c, c->argv+base_args+i+1, &shape.conversion, &shape.t.r.width,
&shape.t.r.height) != C_OK) return;
shape.type = RECTANGLE_TYPE;
bybox = 1;
i += 3;

View File

@ -85,20 +85,16 @@ uint8_t geohashEstimateStepsByRadius(double range_meters, double lat) {
/* Return the bounding box of the search area by shape (see geohash.h GeoShape)
* bounds[0] - bounds[2] is the minimum and maximum longitude
* while bounds[1] - bounds[3] is the minimum and maximum latitude.
* since the higher the latitude, the shorter the arc length, the box shape is as follows
* (left and right edges are actually bent), as shown in the following diagram:
*
* This function does not behave correctly with very large radius values, for
* instance for the coordinates 81.634948934258375 30.561509253718668 and a
* radius of 7083 kilometers, it reports as bounding boxes:
*
* min_lon 7.680495, min_lat -33.119473, max_lon 155.589402, max_lat 94.242491
*
* However, for instance, a min_lon of 7.680495 is not correct, because the
* point -1.27579540014266968 61.33421815228281559 is at less than 7000
* kilometers away.
*
* Since this function is currently only used as an optimization, the
* optimization is not used for very big radiuses, however the function
* should be fixed. */
* \-----------------/ -------- \-----------------/
* \ / / \ \ /
* \ (long,lat) / / (long,lat) \ \ (long,lat) /
* \ / / \ / \
* --------- /----------------\ /--------------\
* Northern Hemisphere Southern Hemisphere Around the equator
*/
int geohashBoundingBox(GeoShape *shape, double *bounds) {
if (!bounds) return 0;
double longitude = shape->xy[0];
@ -106,10 +102,14 @@ int geohashBoundingBox(GeoShape *shape, double *bounds) {
double height = shape->conversion * (shape->type == CIRCULAR_TYPE ? shape->t.radius : shape->t.r.height/2);
double width = shape->conversion * (shape->type == CIRCULAR_TYPE ? shape->t.radius : shape->t.r.width/2);
const double long_delta = rad_deg(width/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude)));
const double lat_delta = rad_deg(height/EARTH_RADIUS_IN_METERS);
bounds[0] = longitude - long_delta;
bounds[2] = longitude + long_delta;
const double long_delta_top = rad_deg(width/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude+lat_delta)));
const double long_delta_bottom = rad_deg(width/EARTH_RADIUS_IN_METERS/cos(deg_rad(latitude-lat_delta)));
/* The directions of the northern and southern hemispheres
* are opposite, so we choice different points as min/max long/lat */
int southern_hemisphere = latitude < 0 ? 1 : 0;
bounds[0] = southern_hemisphere ? longitude-long_delta_bottom : longitude-long_delta_top;
bounds[2] = southern_hemisphere ? longitude+long_delta_bottom : longitude+long_delta_top;
bounds[1] = latitude - lat_delta;
bounds[3] = latitude + lat_delta;
return 1;
@ -137,12 +137,10 @@ GeoHashRadius geohashCalculateAreasByShapeWGS84(GeoShape *shape) {
double latitude = shape->xy[1];
/* radius_meters is calculated differently in different search types:
* 1) CIRCULAR_TYPE, just use radius.
* 2) RECTANGLE_TYPE, in order to calculate accurately, we should use
* sqrt((width/2)^2 + (height/2)^2), so that the box is bound by a circle,
* But the current code a simpler approach resulting in a smaller circle,
* which is safe because we search the 8 nearby boxes anyway. */
* 2) RECTANGLE_TYPE, we use sqrt((width/2)^2 + (height/2)^2) to
* calculate the distance from the center point to the corner */
double radius_meters = shape->type == CIRCULAR_TYPE ? shape->t.radius :
shape->t.r.width > shape->t.r.height ? shape->t.r.width/2 : shape->t.r.height/2;
sqrt((shape->t.r.width/2)*(shape->t.r.width/2) + (shape->t.r.height/2)*(shape->t.r.height/2));
radius_meters *= shape->conversion;
steps = geohashEstimateStepsByRadius(radius_meters,latitude);
@ -245,14 +243,21 @@ int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
return geohashGetDistanceIfInRadius(x1, y1, x2, y2, radius, distance);
}
/* Judge whether a point is in the axis-aligned rectangle.
* bounds : see geohash.h GeoShape::bounds
/* Judge whether a point is in the axis-aligned rectangle, when the distance
* between a searched point and the center point is less than or equal to
* height/2 or width/2 in height and width, the point is in the rectangle.
*
* width_m, height_m: the rectangle
* x1, y1 : the center of the box
* x2, y2 : the point to be searched
*/
int geohashGetDistanceIfInRectangle(double *bounds, double x1, double y1,
int geohashGetDistanceIfInRectangle(double width_m, double height_m, double x1, double y1,
double x2, double y2, double *distance) {
if (x2 < bounds[0] || x2 > bounds[2] || y2 < bounds[1] || y2 > bounds[3]) return 0;
double lon_distance = geohashGetDistance(x2, y2, x1, y2);
double lat_distance = geohashGetDistance(x2, y2, x2, y1);
if (lon_distance > width_m/2 || lat_distance > height_m/2) {
return 0;
}
*distance = geohashGetDistance(x1, y1, x2, y2);
return 1;
}

View File

@ -60,7 +60,7 @@ int geohashGetDistanceIfInRadius(double x1, double y1,
int geohashGetDistanceIfInRadiusWGS84(double x1, double y1, double x2,
double y2, double radius,
double *distance);
int geohashGetDistanceIfInRectangle(double *bounds, double x1, double y1,
int geohashGetDistanceIfInRectangle(double width_m, double height_m, double x1, double y1,
double x2, double y2, double *distance);
#endif /* GEOHASH_HELPER_HPP_ */

View File

@ -1,6 +1,7 @@
# Helper functions to simulate search-in-radius in the Tcl side in order to
# verify the Redis implementation with a fuzzy test.
proc geo_degrad deg {expr {$deg*atan(1)*8/360}}
proc geo_degrad deg {expr {$deg*(atan(1)*8/360)}}
proc geo_raddeg rad {expr {$rad/(atan(1)*8/360)}}
proc geo_distance {lon1d lat1d lon2d lat2d} {
set lon1r [geo_degrad $lon1d]
@ -42,6 +43,34 @@ proc compare_lists {List1 List2} {
return $DiffList
}
# return true If a point in circle.
# search_lon and search_lat define the center of the circle,
# and lon, lat define the point being searched.
proc pointInCircle {radius_km lon lat search_lon search_lat} {
set radius_m [expr {$radius_km*1000}]
set distance [geo_distance $lon $lat $search_lon $search_lat]
if {$distance < $radius_m} {
return true
}
return false
}
# return true If a point in rectangle.
# search_lon and search_lat define the center of the rectangle,
# and lon, lat define the point being searched.
# error: can adjust the width and height of the rectangle according to the error
proc pointInRectangle {width_km height_km lon lat search_lon search_lat error} {
set width_m [expr {$width_km*1000*$error/2}]
set height_m [expr {$height_km*1000*$error/2}]
set lon_distance [geo_distance $lon $lat $search_lon $lat]
set lat_distance [geo_distance $lon $lat $lon $search_lat]
if {$lon_distance > $width_m || $lat_distance > $height_m} {
return false
}
return true
}
# The following list represents sets of random seed, search position
# and radius that caused bugs in the past. It is used by the randomized
# test later as a starting point. When the regression vectors are scanned
@ -225,19 +254,26 @@ start_server {tags {"geo"}} {
test {GEOSEARCH non square, long and narrow} {
r del Sicily
r geoadd Sicily 12.75 37.00 "test1"
r geoadd Sicily 12.75 36.995 "test1"
r geoadd Sicily 12.75 36.50 "test2"
r geoadd Sicily 13.00 36.50 "test3"
# box height=2km width=400km
set ret1 [r geosearch Sicily fromlonlat 15 37 bybox 2 400 km]
set ret1 [r geosearch Sicily fromlonlat 15 37 bybox 400 2 km]
assert_equal $ret1 {test1}
# Add a western Hemisphere point
r geoadd Sicily -1 37.00 "test3"
set ret2 [r geosearch Sicily fromlonlat 15 37 bybox 2 3000 km asc]
set ret2 [r geosearch Sicily fromlonlat 15 37 bybox 3000 2 km asc]
assert_equal $ret2 {test1 test3}
}
test {GEOSEARCH corner point test} {
r del Sicily
r geoadd Sicily 12.758489 38.788135 edge1 17.241510 38.788135 edge2 17.250000 35.202000 edge3 12.750000 35.202000 edge4 12.748489955781654 37 edge5 15 38.798135872540925 edge6 17.251510044218346 37 edge7 15 35.201864127459075 edge8 12.692799634687903 38.798135872540925 corner1 12.692799634687903 38.798135872540925 corner2 17.200560937451133 35.201864127459075 corner3 12.799439062548865 35.201864127459075 corner4
set ret [lsort [r geosearch Sicily fromlonlat 15 37 bybox 400 400 km asc]]
assert_equal $ret {edge1 edge2 edge5 edge7}
}
test {GEORADIUSBYMEMBER withdist (sorted)} {
r georadiusbymember nyc "wtc one" 7 km withdist
} {{{wtc one} 0.0000} {{union square} 3.2544} {{central park n/q/r} 6.7000} {4545 6.1975} {{lic market} 6.8969}}
@ -360,12 +396,22 @@ start_server {tags {"geo"}} {
assert {[lindex $res 0] eq "Catania"}
}
test {GEOADD + GEORANGE randomized test} {
set attempt 30
test {GEOSEARCH the box spans -180° or 180°} {
r del points
r geoadd points 179.5 36 point1
r geoadd points -179.5 36 point2
assert_equal {point1 point2} [r geosearch points fromlonlat 179 37 bybox 400 400 km asc]
assert_equal {point2 point1} [r geosearch points fromlonlat -179 37 bybox 400 400 km asc]
}
foreach {type} {byradius bybox} {
test "GEOSEARCH fuzzy test - $type" {
if {$::accurate} { set attempt 300 } else { set attempt 30 }
while {[incr attempt -1]} {
set rv [lindex $regression_vectors $rv_idx]
incr rv_idx
set radius_km 0; set width_km 0; set height_km 0
unset -nocomplain debuginfo
set srand_seed [clock milliseconds]
if {$rv ne {}} {set srand_seed [lindex $rv 0]}
@ -375,33 +421,55 @@ start_server {tags {"geo"}} {
if {[randomInt 10] == 0} {
# From time to time use very big radiuses
set radius_km [expr {[randomInt 50000]+10}]
if {$type == "byradius"} {
set radius_km [expr {[randomInt 5000]+10}]
} elseif {$type == "bybox"} {
set width_km [expr {[randomInt 5000]+10}]
set height_km [expr {[randomInt 5000]+10}]
}
} else {
# Normally use a few - ~200km radiuses to stress
# test the code the most in edge cases.
if {$type == "byradius"} {
set radius_km [expr {[randomInt 200]+10}]
} elseif {$type == "bybox"} {
set width_km [expr {[randomInt 200]+10}]
set height_km [expr {[randomInt 200]+10}]
}
}
if {$rv ne {}} {
set radius_km [lindex $rv 1]
set width_km [lindex $rv 1]
set height_km [lindex $rv 1]
}
if {$rv ne {}} {set radius_km [lindex $rv 1]}
set radius_m [expr {$radius_km*1000}]
geo_random_point search_lon search_lat
if {$rv ne {}} {
set search_lon [lindex $rv 2]
set search_lat [lindex $rv 3]
}
lappend debuginfo "Search area: $search_lon,$search_lat $radius_km km"
lappend debuginfo "Search area: $search_lon,$search_lat $radius_km $width_km $height_km km"
set tcl_result {}
set argv {}
for {set j 0} {$j < 20000} {incr j} {
geo_random_point lon lat
lappend argv $lon $lat "place:$j"
set distance [geo_distance $lon $lat $search_lon $search_lat]
if {$distance < $radius_m} {
if {$type == "byradius"} {
if {[pointInCircle $radius_km $lon $lat $search_lon $search_lat]} {
lappend tcl_result "place:$j"
}
lappend debuginfo "place:$j $lon $lat [expr {$distance/1000}] km"
} elseif {$type == "bybox"} {
if {[pointInRectangle $width_km $height_km $lon $lat $search_lon $search_lat 1]} {
lappend tcl_result "place:$j"
}
}
lappend debuginfo "place:$j $lon $lat"
}
r geoadd mypoints {*}$argv
set res [lsort [r georadius mypoints $search_lon $search_lat $radius_km km]]
if {$type == "byradius"} {
set res [lsort [r geosearch mypoints fromlonlat $search_lon $search_lat byradius $radius_km km]]
} elseif {$type == "bybox"} {
set res [lsort [r geosearch mypoints fromlonlat $search_lon $search_lat bybox $width_km $height_km km]]
}
set res2 [lsort $tcl_result]
set test_result OK
@ -409,19 +477,28 @@ start_server {tags {"geo"}} {
set rounding_errors 0
set diff [compare_lists $res $res2]
foreach place $diff {
lassign [lindex [r geopos mypoints $place] 0] lon lat
set mydist [geo_distance $lon $lat $search_lon $search_lat]
set mydist [expr $mydist/1000]
if {$type == "byradius"} {
if {($mydist / $radius_km) > 0.999} {
incr rounding_errors
continue
}
if {$mydist < $radius_m} {
if {$mydist < [expr {$radius_km*1000}]} {
# This is a false positive for redis since given the
# same points the higher precision calculation provided
# by TCL shows the point within range
incr rounding_errors
continue
}
} elseif {$type == "bybox"} {
# we add 0.1% error for floating point calculation error
if {[pointInRectangle $width_km $height_km $lon $lat $search_lon $search_lat 1.001]} {
incr rounding_errors
continue
}
}
}
# Make sure this is a real error and not a rounidng issue.
@ -447,7 +524,6 @@ start_server {tags {"geo"}} {
set mydist [geo_distance $lon $lat $search_lon $search_lat]
set mydist [expr $mydist/1000]
puts "$place -> [r geopos mypoints $place] $mydist $where"
if {($mydist / $radius_km) > 0.999} {incr rounding_errors}
}
set test_result FAIL
}
@ -456,4 +532,86 @@ start_server {tags {"geo"}} {
}
set test_result
} {OK}
}
test {GEOSEARCH box edges fuzzy test} {
if {$::accurate} { set attempt 300 } else { set attempt 30 }
while {[incr attempt -1]} {
unset -nocomplain debuginfo
set srand_seed [clock milliseconds]
lappend debuginfo "srand_seed is $srand_seed"
expr {srand($srand_seed)} ; # If you need a reproducible run
r del mypoints
geo_random_point search_lon search_lat
set width_m [expr {[randomInt 10000]+10}]
set height_m [expr {[randomInt 10000]+10}]
set lat_delta [geo_raddeg [expr {$height_m/2/6372797.560856}]]
set long_delta_top [geo_raddeg [expr {$width_m/2/6372797.560856/cos([geo_degrad [expr {$search_lat+$lat_delta}]])}]]
set long_delta_middle [geo_raddeg [expr {$width_m/2/6372797.560856/cos([geo_degrad $search_lat])}]]
set long_delta_bottom [geo_raddeg [expr {$width_m/2/6372797.560856/cos([geo_degrad [expr {$search_lat-$lat_delta}]])}]]
# Total of 8 points are generated, which are located at each vertex and the center of each side
set points(north) [list $search_lon [expr {$search_lat+$lat_delta}]]
set points(south) [list $search_lon [expr {$search_lat-$lat_delta}]]
set points(east) [list [expr {$search_lon+$long_delta_middle}] $search_lat]
set points(west) [list [expr {$search_lon-$long_delta_middle}] $search_lat]
set points(north_east) [list [expr {$search_lon+$long_delta_top}] [expr {$search_lat+$lat_delta}]]
set points(north_west) [list [expr {$search_lon-$long_delta_top}] [expr {$search_lat+$lat_delta}]]
set points(south_east) [list [expr {$search_lon+$long_delta_bottom}] [expr {$search_lat-$lat_delta}]]
set points(south_west) [list [expr {$search_lon-$long_delta_bottom}] [expr {$search_lat-$lat_delta}]]
lappend debuginfo "Search area: geosearch mypoints fromlonlat $search_lon $search_lat bybox $width_m $height_m m"
set tcl_result {}
foreach name [array names points] {
set x [lindex $points($name) 0]
set y [lindex $points($name) 1]
r geoadd mypoints $x $y place:$name
lappend tcl_result "place:$name"
lappend debuginfo "geoadd mypoints $x $y place:$name"
}
set res2 [lsort $tcl_result]
# make the box larger by two meter in each direction to put the coordinate slightly inside the box.
set height_new [expr {$height_m+4}]
set width_new [expr {$width_m+4}]
set res [lsort [r geosearch mypoints fromlonlat $search_lon $search_lat bybox $width_new $height_new m]]
if {$res != $res2} {
set diff [compare_lists $res $res2]
lappend debuginfo "diff: $diff"
fail "place should be found, debuginfo: $debuginfo, height_new: $height_new width_new: $width_new"
}
# The width decreases and the height increases. Only north and south are found
set width_new [expr {$width_m-4}]
set height_new [expr {$height_m+4}]
set res [lsort [r geosearch mypoints fromlonlat $search_lon $search_lat bybox $width_new $height_new m]]
if {$res != {place:north place:south}} {
set diff [compare_lists $res $res2]
lappend debuginfo "diff: $diff"
fail "place should not be found, debuginfo: $debuginfo, height_new: $height_new width_new: $width_new"
}
# The width increases and the height decreases. Only ease and west are found
set width_new [expr {$width_m+4}]
set height_new [expr {$height_m-4}]
set res [lsort [r geosearch mypoints fromlonlat $search_lon $search_lat bybox $width_new $height_new m]]
if {$res != {place:east place:west}} {
set diff [compare_lists $res $res2]
lappend debuginfo "diff: $diff"
fail "place should not be found, debuginfo: $debuginfo, height_new: $height_new width_new: $width_new"
}
# make the box smaller by two meter in each direction to put the coordinate slightly outside the box.
set height_new [expr {$height_m-4}]
set width_new [expr {$width_m-4}]
set res [r geosearch mypoints fromlonlat $search_lon $search_lat bybox $width_new $height_new m]
if {$res != ""} {
lappend debuginfo "res: $res"
fail "place should not be found, debuginfo: $debuginfo, height_new: $height_new width_new: $width_new"
}
unset -nocomplain debuginfo
}
}
}