Count how often each point in the field is inside the path.

I work with 2D geographic data. I have a long list of contours. Now I want to determine for each point in my domain how many of its contours (i.e. I want to calculate the spatial frequency distribution of the functions represented by the contours).

To illustrate what I want to do, here is the first very naive implementation:

import numpy as np from shapely.geometry import Polygon, Point def comp_frequency(paths,lonlat): """ - paths: list of contour paths, made up of (lon,lat) tuples - lonlat: array containing the lon/lat coordinates; shape (nx,ny,2) """ frequency = np.zeros(lonlat.shape[:2]) contours = [Polygon(path) for path in paths] # Very naive and accordingly slow implementation for (i,j),v in np.ndenumerate(frequency): pt = Point(lonlat[i,j,:]) for contour in contours: if contour.contains(pt): frequency[i,j] += 1 return frequency lon = np.array([ [-1.10e+1,-7.82+0,-4.52+0,-1.18+0, 2.19e+0,5.59e+0,9.01+0,1.24+1,1.58+1,1.92+1,2.26+1], [-1.20e+1,-8.65+0,-5.21+0,-1.71+0, 1.81e+0,5.38e+0,8.97+0,1.25+1,1.61+1,1.96+1,2.32+1], [-1.30e+1,-9.53+0,-5.94+0,-2.29+0, 1.41e+0,5.15e+0,8.91+0,1.26+1,1.64+1,2.01+1,2.38+1], [-1.41e+1,-1.04+1,-6.74+0,-2.91+0, 9.76e-1,4.90e+0,8.86+0,1.28+1,1.67+1,2.06+1,2.45+1], [-1.53e+1,-1.15+1,-7.60+0,-3.58+0, 4.98e-1,4.63e+0,8.80+0,1.29+1,1.71+1,2.12+1,2.53+1], [-1.66e+1,-1.26+1,-8.55+0,-4.33+0,-3.00e-2,4.33e+0,8.73+0,1.31+1,1.75+1,2.18+1,2.61+1], [-1.81e+1,-1.39+1,-9.60+0,-5.16+0,-6.20e-1,3.99e+0,8.66+0,1.33+1,1.79+1,2.25+1,2.70+1], [-1.97e+1,-1.53+1,-1.07+1,-6.10+0,-1.28e+0,3.61e+0,8.57+0,1.35+1,1.84+1,2.33+1,2.81+1], [-2.14e+1,-1.69+1,-1.21+1,-7.16+0,-2.05e+0,3.17e+0,8.47+0,1.37+1,1.90+1,2.42+1,2.93+1], [-2.35e+1,-1.87+1,-1.36+1,-8.40+0,-2.94e+0,2.66e+0,8.36+0,1.40+1,1.97+1,2.52+1,3.06+1], [-2.58e+1,-2.08+1,-1.54+1,-9.86+0,-3.99e+0,2.05e+0,8.22+0,1.44+1,2.05+1,2.65+1,3.22+1]]) lat = np.array([ [ 29.6, 30.3, 30.9, 31.4, 31.7, 32.0, 32.1, 32.1, 31.9, 31.6, 31.2], [ 32.4, 33.2, 33.8, 34.4, 34.7, 35.0, 35.1, 35.1, 34.9, 34.6, 34.2], [ 35.3, 36.1, 36.8, 37.3, 37.7, 38.0, 38.1, 38.1, 37.9, 37.6, 37.1], [ 38.2, 39.0, 39.7, 40.3, 40.7, 41.0, 41.1, 41.1, 40.9, 40.5, 40.1], [ 41.0, 41.9, 42.6, 43.2, 43.7, 44.0, 44.1, 44.0, 43.9, 43.5, 43.0], [ 43.9, 44.8, 45.6, 46.2, 46.7, 47.0, 47.1, 47.0, 46.8, 46.5, 45.9], [ 46.7, 47.7, 48.5, 49.1, 49.6, 49.9, 50.1, 50.0, 49.8, 49.4, 48.9], [ 49.5, 50.5, 51.4, 52.1, 52.6, 52.9, 53.1, 53.0, 52.8, 52.4, 51.8], [ 52.3, 53.4, 54.3, 55.0, 55.6, 55.9, 56.1, 56.0, 55.8, 55.3, 54.7], [ 55.0, 56.2, 57.1, 57.9, 58.5, 58.9, 59.1, 59.0, 58.8, 58.3, 57.6], [ 57.7, 59.0, 60.0, 60.8, 61.5, 61.9, 62.1, 62.0, 61.7, 61.2, 60.5]]) lonlat = np.dstack((lon,lat)) paths = [ [(-1.71,34.4),(1.81,34.7),(5.15,38.0),(4.9,41.0),(4.63,44.0),(-0.03,46.7),(-4.33,46.2),(-9.6,48.5),(-8.55,45.6),(-3.58,43.2),(-2.91,40.3),(-2.29,37.3),(-1.71,34.4)], [(0.976,40.7),(-4.33,46.2),(-0.62,49.6),(3.99,49.9),(4.33,47.0),(4.63,44.0),(0.976,40.7)], [(2.9,55.8),(2.37,56.0),(8.47,56.1),(3.17,55.9),(-2.05,55.6),(-1.28,52.6),(-0.62,49.6),(4.33,47.0),(8.8,44.1),(2.29,44.0),(2.71,43.9),(3.18,46.5),(3.25,49.4),(3.33,52.4),(2.9,55.8)], [(2.25,35.1),(2.26,38.1),(8.86,41.1),(5.15,38.0),(5.38,35.0),(9.01,32.1),(2.25,35.1)]] frequency = comp_frequency(paths,lonlat) 

Of course, it is approximately as inefficiently written as possible, with all explicit loops and, accordingly, forever.

How can I do this efficiently?

Edit: Added some sample data on request. Please note that my real domain 150 ** 2 is larger (in terms of resolution), since I created coordinate samples by cutting off the original arrays: lon[::150] .

+3
source share
3 answers

So, meanwhile, I found a good solution thanks to a colleague who at some point implemented something similar (based on this blog post ).

Old, very slow approach using slim

Firstly, here my reference implementation uses a slender, which is just a slightly refined version of my first โ€œnaiveโ€ approach in opening a message. It is easy to understand and work, but very slow.

 import numpy as np from shapely.geometry import Polygon, Point def spatial_contour_frequency_shapely(paths,lon,lat): frequency = np.zeros(lon.shape) contours = [Polygon(path) for path in paths] for (i,j),v in np.ndenumerate(frequency): pt = Point([lon[i,j],lat[i,j]]) for contour in contours: if contour.contains(pt): frequency[i,j] += 1 return frequency 

New, very fast solution using PIL

My (almost) final solution is no longer used, but instead uses image manipulation methods from PIL (Python Imaging Library). This solution is much, much, much faster, although in this form only for regular rectangular grids (see. Comment below).

 import numpy as np from PIL import Image, ImageDraw def _spatial_contour_frequency_pil(paths,lon,lat,regular_grid=False, method_ind=None): def get_indices(points,lon,lat,tree=None,regular=False): def get_indices_regular(points,lon,lat): lon,lat = lon.T,lat.T def _get_ij(lon,lat,x,y): lon0 = lon[0,0] lat0 = lat[0,0] lon1 = lon[-1,-1] lat1 = lat[-1,-1] nx,ny = lon.shape dx = (lon1-lon0)/nx dy = (lat1-lat0)/ny i = int((x-lon0)/dx) j = int((y-lat0)/dy) return i,j return [_get_ij(lon,lat,x,y) for x,y in points] def get_indices_irregular(points,tree,shape): dist,idx = tree.query(points,k=1) ind = np.column_stack(np.unravel_index(idx,lon.shape)) return [(i,j) for i,j in ind] if regular: return get_indices_regular(points,lon,lat) return get_indices_irregular(points,tree,lon.T.shape) tree = None if not regular_grid: lonlat = np.column_stack((lon.T.ravel(),lat.T.ravel())) tree = sp.spatial.cKDTree(lonlat) frequency = np.zeros(lon.shape) for path in paths: path_ij = get_indices(path,lon,lat,tree=tree,regular=regular_grid) raster_poly = Image.new("L",lon.shape,0) rasterize = ImageDraw.Draw(raster_poly) rasterize.polygon(path_ij,1) mask = sp.fromstring(raster_poly.tobytes(),'b') mask.shape = raster_poly.im.size[1],raster_poly.im.size[0] frequency += mask return frequency 

It should be noted that the result of these two approaches is not the same. The functions identified by the PIL approach are slightly larger than those defined with the effective approach, but in reality one is not better than the other.

Delays

Below are some timings created with a reduced data set (but not semi-artificial example data from the initial message):

 spatial_contour_frequency/shapely : 191.8843 spatial_contour_frequency/pil : 0.3287 spatial_contour_frequency/pil-tree-inside : 2.3629 spatial_contour_frequency/pil-regular_grid : 0.3276 

The most time-consuming step is to search for indices on an irregular grid of lon / lat contour points. The biggest part of the time is the cKDTree construct, so I moved it from get_indices . To put this in perspective, pil-tree-inside is the version in which the tree is created inside get_indices . pil-regular-grid has regular_grid=True , which for my dataset gives incorrect results, but gives an idea of โ€‹โ€‹how quickly this will work on a regular grid.

In general, I managed to largely eliminate the effect of an irregular grid ( pil vs. pil-regular-grid ), which I could hope for at the beginning! :)

+1
source

If your input polygons are actually contours, you'd better work directly with your input grids than calculate the contours and test if there is a point inside them.

The contours correspond to a constant value of data with reference to the grid. Each contour is a polygon covering areas of the input grid that exceed this value.

If you need to know how many contours are at a given point, itโ€™s faster to test the input grid at the pointโ€™s location and use the return value of โ€œzโ€. The number of contours that can be drawn inside it directly from it, if you know what values โ€‹โ€‹you created in the contours.

For instance:

 import numpy as np from scipy.interpolate import RegularGridInterpolator import matplotlib.pyplot as plt # One of your input gridded datasets y, x = np.mgrid[-5:5:100j, -5:5:100j] z = np.sin(np.hypot(x, y)) + np.hypot(x, y) / 10 contour_values = [-1, -0.5, 0, 0.5, 1, 1.5, 2] # A point location... x0, y0 = np.random.normal(0, 2, 2) # Visualize what happening... fig, ax = plt.subplots() cont = ax.contourf(x, y, z, contour_values, cmap='gist_earth') ax.plot([x0], [y0], marker='o', ls='none', color='salmon', ms=12) fig.colorbar(cont) # Instead of working with whether or not the point intersects the # contour polygons we generated, we'll turn the problem on its head: # Sample the grid at the point location interp = RegularGridInterpolator((x[0,:], y[:,0]), z) z0 = interp([x0, y0]) # How many contours would the point be inside? num_inside = sum(z0 > c for c in contour_values)[0] ax.set(title='Point is inside {} contours'.format(num_inside)) plt.show() 

enter image description here

+4
source
 def comp_frequency_lc(paths,lonlat): import operator frequency = np.zeros(lonlat.shape[:2]) contours = [Polygon(path) for path in paths] for (i,j),v in np.ndenumerate(frequency): pt = Point(lonlat[i,j,:]) [ operator.setitem(frequency,(i,j), operator.getitem(frequency,(i,j))+1) for contour in contours if contour.contains(pt) ] return frequency print(comp_frequency(paths,lonlat)) **result in**: [[ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 1. 0. 0. 1. 2. 2. 2.] [ 0. 1. 0. 0. 1. 0. 0. 1. 1. 1. 1.] [ 0. 2. 0. 0. 2. 0. 0. 2. 2. 2. 1.] [ 0. 2. 0. 0. 1. 0. 0. 1. 1. 1. 2.] [ 0. 1. 0. 0. 0. 0. 0. 1. 2. 1. 1.] [ 0. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0.] [ 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]] 

here are the results from time to time .... where comp_frequency_lc is the one that uses list comprehension

 # 10000 runs # comp_frequency 185.10419257196094 # comp_frequency_lc 124.42639064462102 
0
source

Source: https://habr.com/ru/post/1232808/


All Articles