Sampler

Generate random sample locations.

Sampler

 Sampler (smp_areas:geopandas.geodataframe.GeoDataFrame)

Sample random location in smp_areas.

	Type	Details
smp_areas	GeoDataFrame	Geographical area to sample from.
Returns	GeoDataFrame	loc_id, geometry (Point or MultiPoint).

Exported source

class Sampler:
    "Sample random location in `smp_areas`."
    def __init__(self, 
                 smp_areas:gpd.GeoDataFrame, # Geographical area to sample from.
                ) -> gpd.GeoDataFrame: # loc_id, geometry (Point or MultiPoint).
        fc.store_attr()
        
    @property
    def loc_ids(self):
        arr = self.smp_areas.reset_index().loc_id.values
        if len(arr) != len(np.unique(arr)):
            raise ValueError(f'{self.loc_id_col} column contains non-unique values.')
        else:
            return arr
        
    def sample(self, 
               n:np.ndarray, # Number of samples
               **kwargs
              ):
        mask = n == 0    
        pts_gseries = self.smp_areas[~mask].sample_points(n[~mask], **kwargs)
        gdf_pts = gpd.GeoDataFrame(geometry=pts_gseries, index=pts_gseries.index)
        gdf_pts.index.name = 'loc_id'
        return gdf_pts

How to use:

fname_raster = './files/ground-truth-02-4326-simulated.tif'
gdf_grid = gridder(fname_raster, nrows=10, ncols=10)

sampler = Sampler(gdf_grid)
n = np.random.randint(0, high=10, size=len(gdf_grid), dtype=int)
gdf_samples = sampler.sample(n, method='uniform'); print(gdf_samples.head())

                                                 geometry
loc_id                                                   
0       MULTIPOINT (-1.21319 43.01176, -1.20744 43.005...
1       MULTIPOINT (-1.22320 43.03429, -1.21657 43.051...
2       MULTIPOINT (-1.22229 43.09713, -1.21626 43.078...
3       MULTIPOINT (-1.22233 43.12831, -1.22226 43.101...
4       MULTIPOINT (-1.21914 43.16511, -1.21857 43.168...

ax = gdf_samples.plot(markersize=1, c=red)
gdf_grid.boundary.plot(ax=ax, color=black, lw=0.5)
ax.axis('off');

source

rank_to_sample

 rank_to_sample (ranks:numpy.ndarray, budget:int, min:int=0,
                 policy:str='Weighted')

Map ranks to number of samples to be collected

	Type	Default	Details
ranks	ndarray		Ranks sorted by `loc_id`s
budget	int		Total data collection budget available
min	int	0	Minimum of samples to be collected per area of interest
policy	str	Weighted	policy used form mapping ranks to number of samples
Returns	ndarray		Number of samples per area of interest to be collected in the same order as ranks

Exported source

def rank_to_sample(ranks:np.ndarray, # Ranks sorted by `loc_id`s
                   budget:int, # Total data collection budget available
                   min:int=0, # Minimum of samples to be collected per area of interest
                  policy:str="Weighted" # policy used form mapping ranks to number of samples
                  ) -> np.ndarray: # Number of samples per area of interest to be collected in the same order as ranks
    "Map ranks to number of samples to be collected"
    if policy == "Weighted":
        weights = 1/ranks
        normalized_weights = np.array(weights) / np.sum(weights)
        allocation = np.round(budget * normalized_weights).astype(int)
        return np.where(allocation < min, min, allocation)
    
    elif policy == "quantiles":
        # 4. Sampling policy (based on 4 quantiles of rank)
        n_quantile = len(ranks)/4
        sampling_policy = [int(budget*0.5/n_quantile), int(budget*0.3/n_quantile), int(budget*0.20/n_quantile), 0]

        # Calculate quantiles thresholds
        quantiles_thresholds = np.quantile(ranks, [0.25, 0.5, 0.75, 1.0])
        
        # Assign each rank to a quantile
        quantile_indices = np.digitize(ranks, quantiles_thresholds, right=True)
        
        # Map each quantile to its corresponding value in sampling_policy
        samples_per_quantile = np.array([sampling_policy[i] for i in quantile_indices])
        
        # Ensure minimum samples collected per area
        samples_per_quantile = np.where(samples_per_quantile < min, min, samples_per_quantile)
        
        return samples_per_quantile
    else:
        raise ValueError(f'Policy {policy} not implemented.')