import numpy as np #~ from numpy cimport * cimport numpy as np cimport cython from cython.parallel import prange, parallel, threadid from libc.stdlib cimport malloc, free, calloc #~ long random() cdef import from "math.h": double log10(double) nogil double log(double) nogil double exp(double) nogil double sqrt(double) nogil double fabs(double) nogil int abs(int) nogil double floor(double) nogil double powf(double, double) nogil # External definitions from openmp headers cimport openmp cdef import from "omp.h": void omp_set_num_threads(int num_threads) ctypedef struct omp_lock_t: pass # External definitions from numpy headers cdef extern from "numpy/arrayobject.h": ctypedef int npy_intp ctypedef int intp ctypedef extern class numpy.ndarray [object PyArrayObject]: cdef char *data cdef int nd cdef intp *dimensions cdef intp *strides cdef int flags #~ void* PyArray_GetPtr(ndarray aobj, npy_intp* ind) nogil void* PyArray_GETPTR2(ndarray m, npy_intp i, npy_intp j) nogil void* PyArray_GETPTR4(ndarray m, npy_intp i, npy_intp j, npy_intp k, npy_intp l) nogil cdef double INFINITY = np.inf cdef double NEG_INFINITY = -np.inf cdef extern from "box_triangle_intersect.c": int triBoxOverlap(float, float, float, float, float, float, float, float, float, float, float, float, float, float, float) nogil import time from random import random import logging from itertools import product import math _log = logging.getLogger() # cdef enum: UN_BLOCKED = 0 BLOCKED = 1 UNPASSABLE = 2 AIR = 3 # viterbi decoder pruning constants cdef enum: PRUNED = 1 UNPRUNED = 0 cdef class SortedXYZList: ''' skiplist like structure that ensures inserted elements are always sorted and that removes the smallest entries if capacity is exceeded http://igoro.com/archive/skip-lists-are-fascinating/ http://code.activestate.com/recipes/576930-efficient-running-median-using-an-indexable-skipli/ ''' cdef public: double minimum char isfilled cdef: int maxlevels, capacity, insertion_count int *nodelinks double *nodevalues int *xyz int fill def __init__(self, int capacity): self.fill = 2 # start at position 2 as the head and the tail are allready in lists self.maxlevels = int(1 + log(capacity) / log(2)) self.capacity = capacity + 2 self.clear(True) @cython.cdivision(True) cdef void insert(SortedXYZList self, double value, int x, int y, int z) nogil: cdef int level, maxlevels, curr, next, new, smallest cdef int offset_curr, offset_smallest, offset_xyz maxlevels = self.maxlevels if not self.isfilled: # insert a new element new = self.fill self.fill += 1 if self.fill == self.capacity: smallest = self.nodelinks[0] # the head node points to the smallest element offset_smallest = smallest * maxlevels # update the minimum to the value of the smallest element self.minimum = self.nodevalues[self.nodelinks[offset_smallest]] # check if curent value is even smaller if value < self.minimum: self.minimum = value self.isfilled = 1 # update minimum #~ if value < self.minimum: #~ self.minimum = value else: if value <= self.minimum: # nothing to do - we will not insert a element # that is too small return # replace the smallest element smallest = self.nodelinks[0] # the head node points to the smallest element offset_smallest = smallest * maxlevels # update the minimum to the value of the second-smallest element self.minimum = self.nodevalues[self.nodelinks[offset_smallest]] if value < self.minimum: # the new incoming value is still smaller than the *currently* second smallest self.minimum = value # now update all next-links on head node to the outgoing links of the "node to be removed = the smallest" for level in range(maxlevels): next_on_level = self.nodelinks[offset_smallest + level] if next_on_level > 0: # will be 0 if the "historical" level_for_insert is reached self.nodelinks[level] = next_on_level # clear old entries! self.nodelinks[offset_smallest + level] = 0 else: break # the new node will get the position of the smallest node (in the array) new = smallest # With a probability of 1/2, make the node a part of the lowest-level list only. # With 1/4 probability, the node will be a part of the lowest two lists. # With 1/8 probability, the node will be a part of three lists. And so forth. cdef int level_for_insert, i #~ level_for_insert = min(self.maxlevels, 1 - int(log(random()) / log(2))) for i in range(self.maxlevels - 1, -1, -1): if self.insertion_count % (1 << i) == 0: level_for_insert = i + 1 break self.insertion_count += 1 curr = 0 # the head node self.nodevalues[new] = value # store xyz values offset_xyz = new*3 self.xyz[offset_xyz] = x self.xyz[offset_xyz + 1] = y self.xyz[offset_xyz + 2] = z for level in range(self.maxlevels - 1, -1, -1): offset_curr = curr * maxlevels + level next = self.nodelinks[offset_curr] while value > self.nodevalues[next]: curr = next offset_curr = curr * maxlevels + level next = self.nodelinks[offset_curr] if level < level_for_insert: self.nodelinks[new * maxlevels + level] = self.nodelinks[offset_curr] self.nodelinks[offset_curr] = new def getall(self): ''' debug function''' cdef int curr = 0, next, offset_curr = curr * self.maxlevels next = self.nodelinks[offset_curr] while self.nodevalues[next] < INFINITY: v = self.nodevalues[self.nodelinks[curr * self.maxlevels]] curr = next print v, curr, self.nodelinks[curr * self.maxlevels] next = self.nodelinks[curr * self.maxlevels] @cython.boundscheck(False) def xyzvAsArray(self): '''transform stored list to ascending sorted np.array''' cdef np.ndarray[np.double_t, ndim=2] result = np.zeros((self.fill - 2, 4), dtype=np.double) cdef int i, offset cdef int curr = 0, next, offset_curr = curr * self.maxlevels next = self.nodelinks[offset_curr] i = 0 while self.nodevalues[next] < INFINITY: offset = self.nodelinks[curr * self.maxlevels] * 3 result[i, 0] = self.xyz[offset] result[i, 1] = self.xyz[offset+1] result[i, 2] = self.xyz[offset+2] result[i, 3] = self.nodevalues[self.nodelinks[curr * self.maxlevels]] curr = next next = self.nodelinks[curr * self.maxlevels] i += 1 return result @cython.boundscheck(False) def particleDataAsArray(self): '''transform stored list to ascending sorted np.array for ParticleFilter''' cdef np.ndarray[np.double_t, ndim=2] result = np.zeros((4, self.fill - 2), dtype=np.double) cdef int i, offset, sum_x = 0, sum_y = 0, sum_z = 0 cdef int curr = 0, next, offset_curr = curr * self.maxlevels cdef double w, sum_w = 0 next = self.nodelinks[offset_curr] i = 0 while self.nodevalues[next] < INFINITY: offset = self.nodelinks[curr * self.maxlevels] * 3 result[0, i] = self.xyz[offset] result[1, i] = self.xyz[offset+1] result[2, i] = self.xyz[offset+2] sum_x += self.xyz[offset] sum_y += self.xyz[offset+1] sum_z += self.xyz[offset+2] w = self.nodevalues[self.nodelinks[curr * self.maxlevels]] sum_w += w result[3, i] = w curr = next next = self.nodelinks[curr * self.maxlevels] i += 1 return result, sum_w, sum_x, sum_y, sum_z @cython.boundscheck(False) cdef int updatePruningArray(SortedXYZList self, np.ndarray[np.int8_t, ndim=3] pruned, int t_shape_x, int t_shape_y, int t_shape_z, int s_shape_x, int s_shape_y, int s_shape_z, int offset_x, int offset_y, int offset_z): ''' this should be a method of the ViterbiDecoder but have no clue how to get access to self.xyz''' cdef int i, offset, sx, sy, sz, tx, ty, tz cdef int prev_idx_x, prev_idx_y, prev_idx_z # skip first 2 entries (head/tail) for i in prange(2, self.fill, nogil=True): offset = i*3 sx = self.xyz[offset] sy = self.xyz[offset+1] sz = self.xyz[offset+2] # now unprune environment for tx in range(t_shape_x): prev_idx_x = sx + tx - offset_x if not 0 <= prev_idx_x < s_shape_x: continue for ty in range(t_shape_y): prev_idx_y = sy + ty - offset_y if not 0 <= prev_idx_y < s_shape_y: continue for tz in range(t_shape_z): prev_idx_z = sz + tz - offset_z if not 0 <= prev_idx_z < s_shape_z: continue # do not overwrite other PRUNED marker #~ if pruned[prev_idx_x, prev_idx_y, prev_idx_z] == PRUNED: pruned[prev_idx_x, prev_idx_y, prev_idx_z] = UNPRUNED return i cdef clear(SortedXYZList self, realloc=False): cdef int i if realloc: self.nodelinks = calloc(self.capacity * self.maxlevels, sizeof(int)) self.nodevalues = calloc(self.capacity, sizeof(double)) self.xyz = calloc(self.capacity * 3, sizeof(int)) else: # only set links to 0 for i in range(self.capacity * self.maxlevels): self.nodelinks[i] = 0 # start at position 2 as the head and the tail are allready in lists self.fill = 2 self.isfilled = 0 # initialize head node for i in range(self.maxlevels): self.nodelinks[0 + i] = 1 # initialize tail node self.nodevalues[1] = INFINITY self.insertion_count = 1 # used for generating probabilities # will be set automatically to valid values if initial loading happens self.minimum = NEG_INFINITY def __dealloc__(SortedXYZList self): free(self.nodelinks) free(self.nodevalues) @cython.boundscheck(False) cdef inline double emission_all(int numaps, double* mus, double* xs, double no_signal_delta) nogil: cdef double emission_cost = 0.0, mu cdef int j for j in range(numaps): mu = mus[j] #~ sigma = eprobs[j*2+1] if xs[j] == 2: continue elif xs[j] == 1: # no measurement if mu > -100: emission_cost = emission_cost + no_signal_delta continue emission_cost = emission_cost + fabs(mu - xs[j]) return emission_cost @cython.boundscheck(False) cdef inline double emission_euclid_all(int numaps, double* mus, double* xs, double no_signal_delta) nogil: cdef double emission_cost = 0.0, mu, cost = 0.0 cdef int j for j in range(numaps): mu = mus[j] if xs[j] == 2: continue elif xs[j] == 1: # no measurement if mu > -100: emission_cost = emission_cost + no_signal_delta * no_signal_delta continue cost = mu - xs[j] emission_cost = emission_cost + cost * cost return sqrt(emission_cost) @cython.boundscheck(False) cdef inline double emission_top3_mu(int numaps, double* mus, double* xs) nogil: cdef double emission_cost = 0.0, mu cdef int j cdef double top1_e, top2_e, top3_e cdef double top1_mu, top2_mu, top3_mu top1_mu = top2_mu = top3_mu = -1000 for j in range(numaps): mu = mus[j] #~ sigma = eprobs[j*2+1] if xs[j] == 2: continue elif xs[j] == 1: # no measurement if mu > -100: emission_cost = emission_cost + 4 continue emission_cost = fabs(mu - xs[j]) if mu > top1_mu: top3_mu = top2_mu top2_mu = top1_mu top1_mu = mu top3_e = top2_e top2_e = top1_e top1_e = emission_cost elif mu > top2_mu: top3_mu = top2_mu top2_mu = mu top3_e = top2_e top2_e = emission_cost elif mu > top3_mu: top3_mu = mu top3_e = emission_cost emission_cost = top1_e + top2_e + top3_e return emission_cost @cython.boundscheck(False) cdef inline double emission_top3_deltas(int numaps, double* mus, double* xs) nogil: ''' use top3 costly deltas ''' cdef int j cdef double top1_e = 0, top2_e = 0, top3_e = 0 for j in range(numaps): if xs[j] > 0: # no measurement (1) or globally unseen ap (2) continue emission_cost = fabs(mus[j] - xs[j]) if emission_cost > top1_e: top3_e = top2_e top2_e = top1_e top1_e = emission_cost elif emission_cost > top2_e: top3_e = top2_e top2_e = emission_cost elif emission_cost > top3_e: top3_e = emission_cost #~ return sqrt(top1_e * top1_e + top2_e * top2_e + top3_e * top3_e) return top1_e + top2_e + top3_e @cython.boundscheck(False) cdef inline double emission_euclid_top3_deltas(int numaps, double* mus, double* xs) nogil: ''' use top3 costly deltas ''' cdef int j cdef double top1_e = 0, top2_e = 0, top3_e = 0 for j in range(numaps): if xs[j] > 0: # no measurement (1) or globally unseen ap (2) continue emission_cost = fabs(mus[j] - xs[j]) if emission_cost > top1_e: top3_e = top2_e top2_e = top1_e top1_e = emission_cost elif emission_cost > top2_e: top3_e = top2_e top2_e = emission_cost elif emission_cost > top3_e: top3_e = emission_cost return sqrt(top1_e * top1_e + top2_e * top2_e + top3_e * top3_e) cdef class ViterbiDecoder: cdef public verbose, store_pruning cdef public list estimated_paths cdef public list history cdef public double no_signal_delta cdef: int s_shape_x, s_shape_y, s_shape_z int t_shape_x, t_shape_y, t_shape_z int offset_x, offset_y, offset_z long numstates int len_measurements int emissionCostMode SortedXYZList pl np.ndarray blockedCubes np.ndarray emission_probs np.ndarray transition_probs np.ndarray last_probs def __init__(self, np.ndarray[np.int_t, ndim=3] blockedCubes, np.ndarray[np.double_t, ndim=5] emission_probs, np.ndarray[np.double_t, ndim=6] transition_probs, verbose=True, store_pruning=False, no_signal_delta=10): self.verbose = verbose self.blockedCubes = blockedCubes self.emission_probs = emission_probs self.transition_probs = transition_probs self.s_shape_x = emission_probs.shape[0] self.s_shape_y = emission_probs.shape[1] self.s_shape_z = emission_probs.shape[2] self.t_shape_x = transition_probs.shape[3] self.t_shape_y = transition_probs.shape[4] self.t_shape_z = transition_probs.shape[5] self.estimated_paths = [] self.store_pruning = store_pruning self.history = [] self.no_signal_delta = no_signal_delta #~ self.emissionCostMode = 1 # manhattan top 3 self.emissionCostMode = 3 # euclid all #~ self.emissionCostMode = 4 # euclid top 3 #cdef np.ndarray[np.double_t, ndim=3] self.last_probs = np.empty((self.s_shape_x, self.s_shape_y, self.s_shape_z), dtype=np.double) @cython.boundscheck(False) def getUnblockedIndices(self): cdef np.ndarray[np.int_t, ndim=3] blockedCubes = self.blockedCubes cdef np.ndarray[np.int_t, ndim=2] indices = np.zeros((self.s_shape_x * self.s_shape_y * self.s_shape_z, 3), dtype=np.int) cdef int i, sx, sy, sz i = 0 for sx in range(self.s_shape_x): for sy in range(self.s_shape_y): for sz in range(self.s_shape_z): if blockedCubes[sx, sy, sz] == UN_BLOCKED: indices[i, 0] = sx indices[i, 1] = sy indices[i, 2] = sz i = i + 1 indices = np.resize(indices, (i, 3)) return indices cdef inline double _getEmissionCost(ViterbiDecoder self, int numaps, double* mus, double* xs) nogil: ''' calc emission cost for vector of rss values and vector of mus''' if self.emissionCostMode == 0: return emission_all(numaps, mus, xs, self.no_signal_delta) elif self.emissionCostMode == 1: return emission_top3_deltas(numaps, mus, xs) elif self.emissionCostMode == 2: return emission_top3_mu(numaps, mus, xs) elif self.emissionCostMode == 3: return emission_euclid_all(numaps, mus, xs, self.no_signal_delta) elif self.emissionCostMode == 4: return emission_euclid_top3_deltas(numaps, mus, xs) def getEmissionCost(self, np.ndarray[np.double_t, ndim=1] mus, np.ndarray[np.double_t, ndim=1] xs): ''' pure python access to emission costs''' res = self._getEmissionCost(len(mus), mus.data, xs.data) return res @cython.boundscheck(False) @cython.cdivision(True) def decode_with_pruning(self, np.ndarray[np.double_t, ndim=2] measurements, np.ndarray[np.double_t, ndim=3] curr_probs, np.ndarray[np.uint8_t, ndim=4] back_track, prune_to, int continueDecoding): ''' only use the TOP-N hypothesis during search (given by prune_to)''' cdef np.ndarray[np.double_t, ndim=3] last_probs = self.last_probs cdef int s_shape_x=self.s_shape_x, s_shape_y=self.s_shape_y, s_shape_z=self.s_shape_z cdef int t_shape_x=self.t_shape_x, t_shape_y=self.t_shape_y, t_shape_z=self.t_shape_z cdef int offset_x=self.offset_x, offset_y=self.offset_y, offset_z=self.offset_z cdef double mu, sq_sigma, x, transition_cost, min_transition_cost, emission_cost, cost, prev_cost cdef int sx, sy, sz, tx, ty, tz, offset, shape_idx, best_prev_state=0, i, j, k, numaps=measurements.shape[1] cdef int loggging_at = max(1, self.len_measurements / 10) cdef int num_unpruned, num_emitted = 0 cdef SortedXYZList pl = SortedXYZList(prune_to) cdef np.ndarray[np.double_t, ndim=2] xyz_a cdef double sum_x, sum_y, sum_z cdef omp_lock_t mylock openmp.omp_init_lock(&mylock) # initialize cdef np.ndarray[np.int8_t, ndim=3] pruned = np.zeros((s_shape_x, s_shape_y, s_shape_z), dtype=np.int8) cdef np.ndarray[np.int_t, ndim=3] blockedCubes = self.blockedCubes cdef np.ndarray[np.double_t, ndim=5] emission_probs = self.emission_probs cdef np.ndarray[np.double_t, ndim=6] transition_probs = self.transition_probs # start only with unblocked states cdef np.ndarray[np.int_t, ndim=2] indices = self.getUnblockedIndices() num_unpruned = len(indices) seq_result = [] avg_seq_result = [] sum_time2 = sum_time1 = 0 t = time.time() for i in range(0, self.len_measurements): #~ t2 = time.time() if self.verbose and (i % loggging_at == 0 or i == 0 or i == self.len_measurements - 1): _log.debug('calc probs col %s of %s [%dk states/sec] [%s]' % (i, self.len_measurements, (self.numstates / 1000.0 * i) / (time.time() - t + 0.000001), num_unpruned)) for k in prange(num_unpruned, nogil=True): #~ tid = threadid() sx = indices[k, 0] sy = indices[k, 1] sz = indices[k, 2] #~ emission_cost = self._getEmissionCost() emission_cost = self._getEmissionCost(numaps, PyArray_GETPTR4(emission_probs, sx, sy, sz, 0), PyArray_GETPTR2(measurements, i, 0)) if i == 0 and continueDecoding == 0: # first column only emissions curr_probs[sx, sy, sz] = emission_cost if -emission_cost > pl.minimum: openmp.omp_set_lock(&mylock) # acquire pl.insert(-emission_cost, sx, sy, sz) openmp.omp_unset_lock(&mylock) # release continue min_transition_cost = INFINITY shape_idx = -1 with gil: for tx in range(t_shape_x): prev_idx_x = sx + tx - offset_x if not 0 <= prev_idx_x < s_shape_x: shape_idx = shape_idx + t_shape_y * t_shape_z continue for ty in range(t_shape_y): prev_idx_y = sy + ty - offset_y if not 0 <= prev_idx_y < s_shape_y: shape_idx = shape_idx + t_shape_z continue for tz in range(t_shape_z): shape_idx = shape_idx + 1 prev_idx_z = sz + tz - offset_z if not 0 <= prev_idx_z < s_shape_z: continue prev_cost = last_probs[prev_idx_x, prev_idx_y, prev_idx_z] transition_cost = transition_probs[sx, sy, sz, tx, ty, tz] if transition_cost == INFINITY: cost = INFINITY else: cost = prev_cost + transition_cost# * 0 if cost < min_transition_cost: min_transition_cost = cost best_prev_state = shape_idx cost = min_transition_cost + emission_cost curr_probs[sx, sy, sz] = cost if i > 0: # continueDecoding == 1 back_track[i, sx, sy, sz] = best_prev_state if -cost > pl.minimum: openmp.omp_set_lock(&mylock) # acquire pl.insert(-cost, sx, sy, sz) openmp.omp_unset_lock(&mylock) # release #~ sum_time1 += (time.time() - t2) #~ t2 = time.time() # toggle arrays last_probs, curr_probs = curr_probs, last_probs curr_probs.fill(INFINITY) pruned.fill(PRUNED) #~ if self.store_pruning: xyz_a = pl.xyzvAsArray() sum_x = sum_y = sum_z = 0 for k in range(len(xyz_a)-50, len(xyz_a)): sum_x += xyz_a[k, 0] sum_y += xyz_a[k, 1] sum_z += xyz_a[k, 2] seq_result.append(tuple(xyz_a[-1][:3])) avg_seq_result.append((sum_x / 50.0, sum_y / 50.0, sum_z / 50.0)) self.history.append(xyz_a) pl.updatePruningArray(pruned, t_shape_x, t_shape_y, t_shape_z, s_shape_x, s_shape_y, s_shape_z, offset_x, offset_y, offset_z) # not parallelizable num_unpruned = 0 for sx in range(s_shape_x): for sy in range(s_shape_y): for sz in range(s_shape_z): if pruned[sx, sy, sz] != PRUNED: indices[num_unpruned, 0] = sx indices[num_unpruned, 1] = sy indices[num_unpruned, 2] = sz num_unpruned = num_unpruned + 1 # maybe we get a speedup by reducing the memory that has to be hold in cpu cache if len(indices) > num_unpruned * 3: indices = np.resize(indices, (num_unpruned * 2, 3)) pl.clear() #~ sum_time2 += (time.time() - t2) openmp.omp_destroy_lock(&mylock) # deallocate the lock self.last_probs[:] = last_probs #~ print 'phase1: %.3f phase2: %.3f' % (sum_time1, sum_time2) return seq_result, avg_seq_result @cython.boundscheck(False) @cython.cdivision(True) def decode_all(self, np.ndarray[np.double_t, ndim=2] measurements, np.ndarray[np.double_t, ndim=3] last_probs, np.ndarray[np.double_t, ndim=3] curr_probs, np.ndarray[np.uint8_t, ndim=4] back_track): ''' decode the complete searchspace without pruning (Out of Date? - only with pruning was used in the end in Dirks DA) ''' cdef int s_shape_x=self.s_shape_x, s_shape_y=self.s_shape_y, s_shape_z=self.s_shape_z cdef int t_shape_x=self.t_shape_x, t_shape_y=self.t_shape_y, t_shape_z=self.t_shape_z cdef int offset_x=self.offset_x, offset_y=self.offset_y, offset_z=self.offset_z cdef double mu, sq_sigma, x, min_cost, min_transition_cost, emission_cost, cost, prev_cost cdef int sx, sy, sz, tx, ty, tz, len_indices, shape_idx, best_prev_state=0, i, j, k, numaps=measurements.shape[1] cdef int loggging_at = max(1, self.len_measurements / 10) cdef np.ndarray[np.double_t, ndim=5] emission_probs = self.emission_probs cdef np.ndarray[np.double_t, ndim=6] transition_probs = self.transition_probs #~ len_indices = cdef np.ndarray[np.int_t, ndim=2] indices = self.getUnblockedIndices() t = time.time() # xs -> emitted vector for i in range(0, self.len_measurements): if self.verbose and (i % loggging_at == 0 or i == 1 or i == self.len_measurements - 1): _log.debug('calc probs col %s of %s [%dk states/sec] [%s]' % (i, self.len_measurements, (self.numstates * i / 1000) / (time.time() - t + 0.000001), indices.shape[0])) for k in prange(indices.shape[0], nogil=True): sx = indices[k, 0] sy = indices[k, 1] sz = indices[k, 2] # calc emission prob for vector with rss values emission_cost = 0.0 for j in range(numaps): mu = emission_probs[sx, sy, sz, 0, j] sq_sigma = emission_probs[sx, sy, sz, 1, j] x = measurements[i, j] if x == -1: # no measurement continue emission_cost = emission_cost + fabs(mu - x) if i == 0: # first column only emissions curr_probs[sx, sy, sz] = emission_cost continue min_transition_cost = INFINITY shape_idx = -1 with gil: for tx in range(t_shape_x): prev_idx_x = sx + tx - offset_x if not 0 <= prev_idx_x < s_shape_x: shape_idx = shape_idx + t_shape_y * t_shape_z continue for ty in range(t_shape_y): prev_idx_y = sy + ty - offset_y if not 0 <= prev_idx_y < s_shape_y: shape_idx = shape_idx + t_shape_z continue for tz in range(t_shape_z): shape_idx = shape_idx + 1 prev_idx_z = sz + tz - offset_z if not 0 <= prev_idx_z < s_shape_z: continue #~ prev_prob = all_probs[i-1, prev_idx_x, prev_idx_y, prev_idx_z] prev_cost = last_probs[prev_idx_x, prev_idx_y, prev_idx_z] cost = prev_cost + transition_probs[sx, sy, sz, tx, ty, tz] if cost < min_transition_cost: min_transition_cost = cost best_prev_state = shape_idx curr_probs[sx, sy, sz] = min_transition_cost + emission_cost back_track[i, sx, sy, sz] = best_prev_state last_probs, curr_probs = curr_probs, last_probs return last_probs def decode(self, np.ndarray[np.double_t, ndim=2] measurements, int prune_to=0, int num_threads=0, full=False, continueDecoding=False): self.history[:] = [] cdef int s_shape_x=self.s_shape_x, s_shape_y=self.s_shape_y, s_shape_z=self.s_shape_z cdef int t_shape_x=self.t_shape_x, t_shape_y=self.t_shape_y, t_shape_z=self.t_shape_z # we have to locate the (previous) state referenced by the transition index # if the transition_shape is (5, 5, 3) then (tx=2, ty2, tz=1) then previous state is the current state cdef int offset_x, offset_y, offset_z self.offset_x = t_shape_x / 2 self.offset_y = t_shape_y / 2 self.offset_z = t_shape_z / 2 # last column in dynamic programming matrix # curr column in dynamic programming matrix cdef np.ndarray[np.double_t, ndim=3] curr_probs = np.empty((s_shape_x, s_shape_y, s_shape_z), dtype=np.double) if not continueDecoding: self.last_probs.fill(INFINITY) # TODO: rename curr_probs to curr_costs curr_probs.fill(INFINITY) # backtracking pointers to find best path through dynamic programming matrix # we store the only the index of transition shape assert t_shape_x * t_shape_y * t_shape_z < 256, 'you have to increase the backpointer array from uint8 to uint16' cdef np.ndarray[np.uint8_t, ndim=4] back_track = np.zeros((len(measurements), s_shape_x, s_shape_y, s_shape_z), dtype=np.uint8) if self.verbose: _log.info('using %.3f GB Memory for backpointer array' % (back_track.size / 1024.0**3)) self.numstates = s_shape_x * s_shape_y * s_shape_z self.len_measurements = len(measurements) if self.verbose: _log.debug('%sk states per col, total: %.1f mio states' % (self.numstates / 1000, self.numstates / 10.0**6 * self.len_measurements)) omp_set_num_threads(num_threads) try: if prune_to == 0: #~ last_probs = self.decode_all(measurements, last_probs, curr_probs, back_track) seq_res, avg_seq_res = [], [] else: seq_res, avg_seq_res = self.decode_with_pruning(measurements, curr_probs, back_track, prune_to, continueDecoding) finally: omp_set_num_threads(0) # find starting point of best backtracking path cdef int i, j, best_last_state_x = 0, best_last_state_y = 0, best_last_state_z = 0 cdef double min_cost = INFINITY, cost cdef SortedXYZList best_estimates if full: best_estimates = SortedXYZList(s_shape_x * s_shape_y * s_shape_z) for sx in range(s_shape_x): for sy in range(s_shape_y): for sz in range(s_shape_z): cost = self.last_probs[sx, sy, sz] if full: best_estimates.insert(-cost, sx, sy, sz) else: if cost < min_cost: best_last_state_x, best_last_state_y, best_last_state_z = sx, sy, sz min_cost = cost if full: a = best_estimates.xyzvAsArray() #~ np.savetxt('r:/out.txt', a, fmt=('%d', '%d', '%d', '%.1f')) self.estimated_paths[:] = [] # follow backtracking links from best path for i in range(-1, - (prune_to if prune_to > 0 else len(a)), -1): estimated_path = [] curr_state = a[i, 0], a[i, 1], a[i, 2] curr_prob = min_cost for j in range(self.len_measurements-1, -1, -1): estimated_path.append((curr_state, 0)) if j > -1: sx, sy, sz = curr_state[0], curr_state[1], curr_state[2] shape_idx = back_track[j, sx, sy, sz] tx = shape_idx / (t_shape_y * t_shape_z) ty = (shape_idx / t_shape_z) % t_shape_y tz = shape_idx % t_shape_z curr_state = sx + tx - self.offset_x, sy + ty - self.offset_y, sz + tz - self.offset_z self.estimated_paths.append(list(reversed(estimated_path))) estimated_path = [e[0] for e in self.estimated_paths[0]] else: if self.verbose: _log.debug('best last state: (%s, %s, %s) with cost %s' % (best_last_state_x, best_last_state_y, best_last_state_z, min_cost)) # follow backtracking links from best path estimated_path = [] curr_state = best_last_state_x, best_last_state_y, best_last_state_z curr_prob = min_cost for j in range(self.len_measurements-1, -1, -1): estimated_path.append(curr_state) if j > -1: sx, sy, sz = curr_state[0], curr_state[1], curr_state[2] shape_idx = back_track[j, sx, sy, sz] tx = shape_idx / (t_shape_y * t_shape_z) ty = (shape_idx / t_shape_z) % t_shape_y tz = shape_idx % t_shape_z curr_state = sx + tx - self.offset_x, sy + ty - self.offset_y, sz + tz - self.offset_z estimated_path = list(reversed(estimated_path)) return estimated_path, seq_res, avg_seq_res cdef class LMSE: cdef public int verbose cdef public int with_history cdef public list history cdef: int s_shape_x, s_shape_y, s_shape_z np.ndarray emission_probs def __init__(self, np.ndarray[np.double_t, ndim=5] emission_probs, verbose=True, with_history=False): self.emission_probs = emission_probs self.s_shape_x = emission_probs.shape[0] self.s_shape_y = emission_probs.shape[1] self.s_shape_z = emission_probs.shape[2] self.verbose = verbose self.with_history = with_history self.history = [] @cython.boundscheck(False) @cython.cdivision(True) def decode(self, np.ndarray[np.double_t, ndim=2] measurements): t = time.time() cdef int s_shape_x=self.s_shape_x, s_shape_y=self.s_shape_y, s_shape_z=self.s_shape_z cdef np.ndarray[np.double_t, ndim=5] emission_probs = self.emission_probs cdef int numaps = len(measurements[0]) # we dont track XYZ but only the particle index on X cdef SortedXYZList top_mse = SortedXYZList(1000) #~ cdef np.ndarray[np.double_t, ndim=2] top_mse_a cdef int sx, sy, sz cdef int i, j cdef double d, sum_d, sqrt_d, min_d, x cdef int best_x, best_y, best_z seq_res = [] for j in range(len(measurements)): with nogil: min_d = INFINITY for sx in range(s_shape_x): for sy in range(s_shape_y): for sz in range(s_shape_z): sum_d = 0 for i in range(numaps): x = measurements[j, i] if x > 0: # either 1, 2 continue d = emission_probs[sx, sy, sz, 0, i] - x sum_d = sum_d + d * d # sqrt does not change maximum #~ sqrt_d = sqrt(sum_d) sqrt_d = sum_d if sqrt_d < min_d: min_d = sqrt_d best_x = sx; best_y = sy; best_z = sz if self.with_history and -sqrt_d > top_mse.minimum: #~ openmp.omp_set_lock(&mylock) # acquire top_mse.insert(-sqrt_d, sx, sy, sz) #~ openmp.omp_unset_lock(&mylock) # release seq_res.append((best_x, best_y, best_z)) if self.with_history: self.history.append(top_mse.xyzvAsArray()) top_mse.clear() return seq_res cdef inline long lround(double d) nogil: return floor(d + 0.5) cdef class ParticleFilter: cdef public int verbose cdef public int with_history cdef public list history cdef: int s_shape_x, s_shape_y, s_shape_z np.ndarray blockedCubes np.ndarray emission_probs np.ndarray sampling_pool def __init__(self, np.ndarray[np.int_t, ndim=3] blockedCubes, np.ndarray[np.double_t, ndim=5] emission_probs, np.ndarray[np.double_t, ndim=2] sampling_pool, verbose=True, with_history=False, ): self.blockedCubes = blockedCubes self.emission_probs = emission_probs self.sampling_pool = sampling_pool self.s_shape_x = emission_probs.shape[0] self.s_shape_y = emission_probs.shape[1] self.s_shape_z = emission_probs.shape[2] self.verbose = verbose self.with_history = with_history self.history = [] @cython.boundscheck(False) @cython.cdivision(True) def decode(self, np.ndarray[np.double_t, ndim=2] measurements, int num_particles, int do_blocking=1, int over_sample_limit=50, int average_over_particles=0, double smooth=1.0, double replace_below=1e-10, int max_replace=100, double sigma=3.0, double turnoff_blocking=1e3): t = time.time() cdef int s_shape_x=self.s_shape_x, s_shape_y=self.s_shape_y, s_shape_z=self.s_shape_z cdef np.ndarray[np.double_t, ndim=5] emission_probs = self.emission_probs cdef np.ndarray[np.int_t, ndim=3] blockedCubes = self.blockedCubes seq_res = [] if average_over_particles > 0: seq_avg_res = [] else: seq_avg_res = None cdef int numaps = len(measurements[0]) cdef int i, j, k, ii #~ cdef np.ndarray[np.double_t, ndim=1] sigmas = np.empty((numaps, ), dtype=np.double) #~ sigmas.fill(9) cdef int with_history = self.with_history cdef SortedXYZList sorted_weights = SortedXYZList(num_particles) self.history[:] = [] # we dont track XYZ but only the particle index on X cdef SortedXYZList top_particles = SortedXYZList(average_over_particles) cdef np.ndarray[np.double_t, ndim=2] top_particles_a cdef np.ndarray[np.int_t, ndim=3] local_block_map = np.zeros((s_shape_x, s_shape_y, s_shape_z), dtype=np.int) cdef np.ndarray[np.uint32_t, ndim=2] back_track_links = np.zeros((len(measurements), num_particles), dtype=np.uint32) cdef np.ndarray[np.uint32_t, ndim=2] back_track_positions = np.zeros((len(measurements), num_particles), dtype=np.uint32) cdef int OVER_ALLOCATE = num_particles * 2 # if more than half the scene is freespace (UN_BLOCKED), this should be enough cdef np.ndarray[np.int_t, ndim=2] particles, new_particles, random_particle_pool random_particle_pool = np.dstack(np.random.randint(0, size, OVER_ALLOCATE) for size in (s_shape_x, s_shape_y, s_shape_z))[0] cdef int random_particle_pool_idx = 0, num_replaced = 0 particles = random_particle_pool.copy() new_particles = np.zeros((num_particles, 3), dtype=np.int) j = 0 for i in range(OVER_ALLOCATE): if blockedCubes[random_particle_pool[i, 0], random_particle_pool[i, 1], random_particle_pool[i, 2]] != UN_BLOCKED: continue particles[j, 0] = random_particle_pool[i, 0] particles[j, 1] = random_particle_pool[i, 1] particles[j, 2] = random_particle_pool[i, 2] j += 1 if j == num_particles: break cdef np.ndarray[np.double_t, ndim=1] weights = np.zeros((num_particles, ), dtype=np.double) cdef np.ndarray[np.double_t, ndim=2] sampling_pool = self.sampling_pool cdef int pool_size = len(sampling_pool), curr_pool_idx = 0, sampling_pool_idx = 0, over_sample_limit_reached = 0 cdef int total_spawned=0, idx, num_spawned, num_spawn_particles cdef int px, py, pz, best_idx, best_px, best_py, best_pz, new_px, new_py, new_pz cdef int blocked_path, p_back_track_idx, curr_x_i, curr_y_i, curr_z_i, dxi, dyi, dzi cdef double sum_w_new, w_new, mu, sq_sigma, x, dx, dy, dz, w cdef double step_x, step_y, step_z, curr_x, curr_y, curr_z, distance cdef double gauss_C1 = sqrt(2 * math.pi * sigma**2) cdef double gauss_C2 = -(2 * sigma**2) for j in range(len(measurements)): if self.verbose and (j % 10 == 0 or j == len(measurements) - 1): _log.debug('m: %s of %s' % (j, len(measurements))) with nogil: w_max = NEG_INFINITY sum_w_new = 0 #~ idx = random_particle_pool_idx num_replaced = 0 for k in range(num_particles): px = particles[k, 0] py = particles[k, 1] pz = particles[k, 2] if px == -1: continue w_new = 1 for i in range(numaps): x = measurements[j, i] if x > 0: # either 1, 2 continue mu = emission_probs[px, py, pz, 0, i] w_new = w_new * gauss_C1 * exp((x - mu) * (x - mu) / gauss_C2) if w_new == 1: # mo measurements at this position w_new = 0 # hmpf, ugly if w_new < replace_below and num_replaced < max_replace: # replace low prob sample with with random from pool # this will prevent "starving" since new random particels # are permanently fed into the system i = random_particle_pool_idx % OVER_ALLOCATE random_particle_pool_idx += 1 num_replaced += 1 px = particles[k, 0] = random_particle_pool[i, 0] py = particles[k, 1] = random_particle_pool[i, 1] pz = particles[k, 2] = random_particle_pool[i, 2] w_new = 1 for i in range(numaps): x = measurements[j, i] if x > 0: # either 1, 2 continue mu = emission_probs[px, py, pz, 0, i] w_new = w_new * gauss_C1 * exp((x - mu) * (x - mu) / gauss_C2) if w_new == 1: w_new = 0 # hmpf, ugly #XXX: how can we formalize this sqrt weights[k] = powf(w_new, smooth) #~ weights[k] = w_new sum_w_new = sum_w_new + weights[k] if average_over_particles > 0: top_particles.insert(w_new, px, py, pz) if with_history: sorted_weights.insert(w_new, px, py, pz) if w_new > w_max: w_max = w_new best_px = px best_py = py best_pz = pz best_idx = k #~ print random_particle_pool_idx -idx #~ print sum_w_new #~ w_max = 10**9 #~ sum_w_new = 0 #~ for k in range(num_particles): #~ px = particles[k, 0] #~ py = particles[k, 1] #~ pz = particles[k, 2] #~ if px == -1: #~ continue #~ w_new = 0 #~ for i in range(numaps): #~ x = measurements[j, i] #~ if x == 2: #~ continue #~ elif x == 1: # no measurement #~ #if mu > -100: #~ #w_new = w_new + 4 * 4 #~ continue #~ mu = emission_probs[px, py, pz, 0, i] #~ w_new = w_new + (x - mu) * (x - mu) #~ w_new = sqrt(w_new) #~ if w_new == 0: #~ w_new = INFINITY #~ weights[k] = 1.0 / w_new #~ sum_w_new = sum_w_new + weights[k] #~ if average_over_particles > 0: #~ top_particles.insert(-w_new, px, py, pz) #~ if with_history: #~ sorted_weights.insert(-w_new, px, py, pz) #~ if w_new < w_max: #~ w_max = w_new #~ best_px = px #~ best_py = py #~ best_pz = pz #~ best_idx = k #~ print w_max # renormalize for k in range(num_particles): weights[k] = weights[k] / sum_w_new total_spawned = 0 for k in range(num_particles): w = weights[k] px = particles[k, 0] py = particles[k, 1] pz = particles[k, 2] num_spawn_particles = lround(num_particles * w) # prevent overflow, due to rounding if total_spawned + num_spawn_particles > num_particles: num_spawn_particles = num_particles - total_spawned idx = total_spawned num_spawned = 0 for i in range(num_spawn_particles * over_sample_limit): # limit retries if num_spawned == num_spawn_particles: break curr_pool_idx = sampling_pool_idx % pool_size dx = sampling_pool[curr_pool_idx, 0] dy = sampling_pool[curr_pool_idx, 1] dz = sampling_pool[curr_pool_idx, 2] sampling_pool_idx += 1 dxi = lround(dx) dyi = lround(dy) dzi = lround(dz) new_px = px + dxi new_py = py + dyi new_pz = pz + dzi # check if we are still in the allowed space if not (0 <= new_px < s_shape_x and 0 <= new_py < s_shape_y and 0 <= new_pz < s_shape_z): continue # try again # check if if we know that the new_p cannot be reached if local_block_map[new_px, new_py, new_pz] == k: continue # try again # check if destination is blocked if blockedCubes[new_px, new_py, new_pz] != UN_BLOCKED: continue # try again if do_blocking == 1 and sum_w_new > turnoff_blocking: #~ print # scan space between p and new_p for BLOCKED cubes # and reject sample if True distance = sqrt(dxi * dxi + dyi * dyi + dzi * dzi) # euclidian if distance == 0: continue step_x = dxi / distance step_y = dyi / distance step_z = dzi / distance curr_x = px curr_y = py curr_z = pz curr_x_i = int(curr_x); curr_y_i = int(curr_y); curr_z_i = int(curr_z) blocked_path = 0 for ii in range(distance): #~ while not(curr_x_i == new_px or curr_y_i == new_py or curr_z_i == new_pz): curr_x = curr_x + step_x curr_y = curr_y + step_y curr_z = curr_z + step_z curr_x_i = int(curr_x); curr_y_i = int(curr_y); curr_z_i = int(curr_z) #~ print curr_x, curr_y, curr_z, ':', dx, dy, dz, ':', px, py, pz if 0 <= curr_x_i < s_shape_x and 0 <= curr_y_i < s_shape_y and 0 <= curr_z_i < s_shape_z: if blockedCubes[curr_x_i, curr_y_i, curr_z_i] != UN_BLOCKED: blocked_path = 1 #mark es blocked with current block indicator local_block_map[curr_x_i, curr_y_i, curr_z_i] = k break if blocked_path == 1: # mark all cubes after the blocked hit also with current block indicator for ii in range(ii+1, distance): curr_x = curr_x + step_x curr_y = curr_y + step_y curr_z = curr_z + step_z curr_x_i = int(curr_x); curr_y_i = int(curr_y); curr_z_i = int(curr_z) if 0 <= curr_x_i < s_shape_x and 0 <= curr_y_i < s_shape_y and 0 <= curr_z_i < s_shape_z: local_block_map[curr_x_i, curr_y_i, curr_z_i] = k #~ print 'blocked..., resample' continue # try again # ok, we got a valid sample new_particles[idx, 0] = new_px new_particles[idx, 1] = new_py new_particles[idx, 2] = new_pz # link to predessor back_track_links[j, idx] = k # current encoded position back_track_positions[j, idx] = new_px * s_shape_y * s_shape_z + new_py * s_shape_z + new_pz idx += 1 num_spawned += 1 total_spawned += num_spawn_particles if i == num_spawn_particles * over_sample_limit - 1: over_sample_limit_reached += 1 for i in range(total_spawned, num_particles): new_particles[i, 0] = -1 # END nogil seq_res.append((best_px, best_py, best_pz)) if with_history: self.history.append(sorted_weights.xyzvAsArray()) sorted_weights.clear() #~ print best_px, best_py, best_pz if average_over_particles > 0: top_particles_a, sum_w, sum_x, sum_y, sum_z = top_particles.particleDataAsArray() #~ for i in range(len(top_particles_a[0])): #~ print top_particles_a[0, i], #~ print top_particles_a[1, i], #~ print top_particles_a[2, i], #~ print top_particles_a[3, i] #~ print len(top_particles_a[0]), sum_w, sum_x, sum_y, sum_z tpx, tpy, tpz = (int(top_particles_a[0, -1]), int(top_particles_a[1, -1]), int(top_particles_a[2, -1])) assert seq_res[-1] == (tpx, tpy, tpz), '%s != %s' % (seq_res[-1], (tpx, tpy, tpz)) k = len(top_particles_a[0]) seq_avg_res.append((int(sum_x / k), int(sum_y / k), int(sum_z / k))) # toggle new_particles, particles = particles, new_particles # reset new_particles.fill(0) weights.fill(0) local_block_map.fill(0) top_particles.clear() back_tracked_res = [] curr_idx = best_idx curr_px, curr_py, curr_pz = seq_res[-1] for j in range(len(measurements)-1, -1, -1): position = back_track_positions[j, curr_idx] curr_px = position / (s_shape_y * s_shape_z) curr_py = (position / s_shape_z) % s_shape_y curr_pz = position % s_shape_z back_tracked_res.append((curr_px, curr_py, curr_pz)) curr_idx = back_track_links[j, curr_idx] if self.verbose: needed_samples = num_particles * len(measurements) _log.info('sampling stats: poolaccess: %.2e, rejected: %.2e, reject factor: %.1f, oversample limit reached: %s' % (sampling_pool_idx, sampling_pool_idx - needed_samples, (sampling_pool_idx - needed_samples) / needed_samples, over_sample_limit_reached) ) _log.info('duration: %.3f secs' % (time.time() -t )) back_tracked_res = list(reversed(back_tracked_res)) return back_tracked_res, seq_res, seq_avg_res @cython.boundscheck(False) def uncube(np.ndarray[np.double_t, ndim=3] data, orgshape): cdef np.ndarray[np.double_t, ndim=3] result cdef int size_x = orgshape[0] cdef int size_y = orgshape[1] cdef int size_z = orgshape[2] result = np.zeros((size_x, size_y, size_z), dtype=np.double) cdef int x, y, z, _x, _y, _z cdef int cube_width = orgshape[0] / data.shape[0] for x in range(data.shape[0]): for y in range(data.shape[1]): for z in range(data.shape[2]): for _x in range(x * cube_width, x * cube_width + cube_width): for _y in range(y * cube_width, y * cube_width + cube_width): for _z in range(z * cube_width, z * cube_width + cube_width): result[_x, _y, _z] = data[x, y, z] return result def normalize(x, deviceAdaptionValues, adaptMin=-150.0, isInDB=True): '''this is slow of course''' if isInDB: x = 10**(x / 10.0) return toNormalizedDecibelwithDeviceAdaption(np.array([[[x]]]).astype(np.float32), deviceAdaptionValues, adaptMin)[0][0][0] @cython.boundscheck(False) @cython.cdivision(True) def compareForDeviceAdaption(da_values, np.ndarray[np.float32_t, ndim=3] estimated, station2apid_locid2measurement): cdef int i, j cdef double delta, total = 0 cdef int num_deltas = 0 cdef np.ndarray[np.double_t, ndim=3] normalized_estimated cdef np.ndarray[np.double_t, ndim=2] measured for station in da_values: normalized_estimated = toNormalizedDecibelwithDeviceAdaption(estimated, da_values[station]) measured = station2apid_locid2measurement[station] with nogil: for i in range(normalized_estimated.shape[0]): for j in range(normalized_estimated.shape[1]): delta = fabs(normalized_estimated[i, j, 0] - measured[i, j]) # delta != delta -> isnan() check if delta != delta or delta > 100: continue if not normalized_estimated[i, j, 0] > -100: continue total = total + delta num_deltas = num_deltas + 1 return total / num_deltas @cython.boundscheck(False) @cython.cdivision(True) def toNormalizedDecibelwithDeviceAdaption(np.ndarray[np.float32_t, ndim=3] data, deviceAdaptionValues, double adaptMin=-150.0): cdef int x, y, z, i cdef int shape_x=data.shape[0], shape_y=data.shape[1], shape_z=data.shape[2] cdef double v, left_intv, right_intv, left_val, right_val, m cdef int dacount = len(deviceAdaptionValues) assert dacount < 500 cdef double[500] daintervals cdef double[500] davalues deviceAdaptionValues = [(adaptMin, -100 - adaptMin)] + deviceAdaptionValues for i, (dai, dav) in enumerate(deviceAdaptionValues): daintervals[i] = dai davalues[i] = dav cdef np.ndarray[np.double_t, ndim=3] _data = np.zeros((shape_x, shape_y, shape_z), dtype=np.double) for x in prange(shape_x, nogil=True): for y in range(shape_y): for z in range(shape_z): v = data[x, y, z] # convert to db if v > 0: v = 10 * log10(v) if v < adaptMin: v = -100 else: #apply deviceAdaptation with gil: for i in range(dacount): left_intv = daintervals[i] right_intv = daintervals[i+1] left_val = davalues[i] right_val = davalues[i+1] if left_intv <= v < right_intv: # it would be easy to cache m as well diff1 = right_intv - left_intv diff2 = right_intv + right_val - (left_intv + left_val) m = diff2 / diff1 v = left_intv + left_val + (v - left_intv) * m break else: v = -100.0 _data[x, y, z] = v return _data def findpath(x, y, z, level): #~ print '--- !', x, y, z res = [] for i, j, k in product([-1, 0, 1], [-1, 0, 1], [-1, 0, 1]): #~ print i, j, k if x + i == 0 and y + j == 0 and z + k == 0: return [[(x,y,z)]] elif level > 0: children = findpath(x + i, y + j, z + k, level-1) for l in children: l = list(l) l.insert(0, (x, y, z)) res.append(l) return res @cython.boundscheck(False) @cython.cdivision(True) def buildTransitionProbs(cubed_data_shape, transition_shape, np.ndarray[np.int_t, ndim=3] blockedCubes, int cubewidth, int freespace_scan): cdef int s_shape_x, s_shape_y, s_shape_z, t_shape_x, t_shape_y, t_shape_z s_shape_x, s_shape_y, s_shape_z = cubed_data_shape t_shape_x, t_shape_y, t_shape_z = transition_shape assert (t_shape_x, t_shape_y, t_shape_z) in {(3,3,1), (3,3,3), (5,5,3)}, 'unsuported transition_shape %s' % transition_shape cdef np.ndarray[np.double_t, ndim=6] transition_probs = np.zeros((s_shape_x, s_shape_y, s_shape_z, t_shape_x, t_shape_y, t_shape_z), dtype=np.double) # the combined transition probs leading TO this cube #~ cdef np.ndarray[np.double_t, ndim=3] combined_transition_probs = np.zeros((s_shape_x, s_shape_y, s_shape_z), dtype=np.double) # distribute probability evenly over all states s with sum(p(s) for s in S) = 1.0 cdef double p_s = 1.0 / (s_shape_x * s_shape_y * s_shape_z) cdef double cost_p_s = -log(1.0 / (s_shape_x * s_shape_y * s_shape_z)) #~ cdef double p_s_s cdef int sx, sy, sz, tx, ty, tz, numblocked, numunblocked, curr_x, curr_y, curr_z #~ cdef double cdef int freespace, shape_idx, max_freespace_for_transition max_freespace = 0 # 20 highest res cubes - 4 meter if resolution is 0.2m cdef double renormalize_target, renormalize_ratio, renormalize_sum # debugging stuff cdef int prev_idx_x, prev_idx_y, prev_idx_z cdef int offset_x = t_shape_x / 2 cdef int offset_y = t_shape_y / 2 cdef int offset_z = t_shape_z / 2 # for (5, 5, 3) model MAX_PATH_LENGTH = 1 MAX_PATHS = 9 cdef int unblocked_path_found = 0, x, y, z, i cdef np.ndarray[np.int_t, ndim=6] path_to_center = np.zeros((t_shape_x, t_shape_y, t_shape_z, MAX_PATHS, MAX_PATH_LENGTH, 3), dtype=np.int) cdef np.ndarray[np.int_t, ndim=1] numblocked_on_level = np.zeros((3), dtype=np.int) cdef np.ndarray[np.double_t, ndim=1] p_s_s_on_level = np.zeros((3), dtype=np.double) cdef np.ndarray[np.int_t, ndim=1] freespace_for_transition = np.zeros((t_shape_x * t_shape_y * t_shape_z), dtype=np.int) for tx in range(t_shape_x): for ty in range(t_shape_y): for tz in range(t_shape_z): if tx == 0 or tx == 4 or ty == 0 or ty == 4: all_p = findpath(tx-offset_x, ty-offset_y, tz-offset_z, MAX_PATH_LENGTH) for i, p in enumerate(all_p): for j, (x, y, z) in enumerate(p[1:]): path_to_center[tx, ty, tz, i, j, 0] = x + offset_x path_to_center[tx, ty, tz, i, j, 1] = y + offset_y path_to_center[tx, ty, tz, i, j, 2] = z + offset_z cdef int level0_cubes = 1 cdef int level1_cubes = 27 - level0_cubes cdef int level2_cubes = (5 * 5 * 3) - level1_cubes - level0_cubes for sx in range(s_shape_x): for sy in range(s_shape_y): for sz in range(s_shape_z): numblocked = 0 for tx in range(t_shape_x): prev_idx_x = sx + tx - offset_x if not 0 <= prev_idx_x < s_shape_x: numblocked = numblocked + t_shape_y * t_shape_z continue for ty in range(t_shape_y): prev_idx_y = sy + ty - offset_y if not 0 <= prev_idx_y < s_shape_y: numblocked = numblocked + t_shape_z continue for tz in range(t_shape_z): prev_idx_z = sz + tz - offset_z if not 0 <= prev_idx_z < s_shape_z: numblocked += 1 continue if blockedCubes[prev_idx_x, prev_idx_y, prev_idx_z] != UN_BLOCKED: # from cube is directly blocked transition_probs[sx, sy, sz, tx, ty, tz] = INFINITY numblocked += 1 elif blockedCubes[sx, sy, sz] != UN_BLOCKED: # current cube is blocked transition_probs[sx, sy, sz, tx, ty, tz] = INFINITY numblocked += 1 elif (t_shape_x == 5 and t_shape_y == 5) and (tx == 0 or tx == 4 or ty == 0 or ty == 4): unblocked_path_found = 0 for i in range(MAX_PATH_LENGTH): x = path_to_center[tx, ty, tz, i, 0, 0] y = path_to_center[tx, ty, tz, i, 0, 1] z = path_to_center[tx, ty, tz, i, 0, 2] if x == 0 and y == 0 and z == 0: continue x = x - offset_x y = y - offset_y z = z - offset_z if blockedCubes[sx + x, sy + y, sz + z] == UN_BLOCKED: unblocked_path_found = 1 break if unblocked_path_found == 0: transition_probs[sx, sy, sz, tx, ty, tz] = INFINITY numblocked += 1 if freespace_scan == -1: # mode blocked cubes infinity and unblocked cubes zero costs continue # elif == 0: no forward scan, constant costs 1/3 1/3 1/3 costs for each hull level numblocked_on_level[0] = 0 numblocked_on_level[1] = 0 numblocked_on_level[2] = 0 #~ if numblocked < t_shape_x * t_shape_y * t_shape_z : #~ p_s_s = -log(p_s / (t_shape_x * t_shape_y * t_shape_z - numblocked)) #~ else: #~ p_s_s = INFINITY for tx in range(t_shape_x): for ty in range(t_shape_y): for tz in range(t_shape_z): #~ if transition_probs[sx, sy, sz, tx, ty, tz] == 0: #~ transition_probs[sx, sy, sz, tx, ty, tz] = p_s_s if abs(tx - offset_x) == 2 or abs(ty - offset_y) == 2: if transition_probs[sx, sy, sz, tx, ty, tz] == INFINITY: numblocked_on_level[2] += 1 elif abs(tx - offset_x) == 1 or abs(ty - offset_y) == 1: if transition_probs[sx, sy, sz, tx, ty, tz] == INFINITY: numblocked_on_level[1] += 1 else: if transition_probs[sx, sy, sz, tx, ty, tz] == INFINITY: numblocked_on_level[0] += 1 if numblocked < t_shape_x * t_shape_y * t_shape_z : #~ p_s_s_on_level[0] = -log(p_s / level0_cubes / 3.0) #~ p_s_s_on_level[1] = -log(p_s / (level1_cubes - numblocked_on_level[1]) / 3.0) #~ p_s_s_on_level[2] = -log(p_s / (level2_cubes - numblocked_on_level[2]) / 3.0) p_s_s_on_level[0] = -log(p_s / level0_cubes / 3.0) # numblocked_on_level[0] is handled by local INFINITY check if numblocked_on_level[0] == 1: # move probability mass of first level (add all mass from zero level) # double p, half costs p_s_s_on_level[1] = -log((p_s + p_s) / (level1_cubes - numblocked_on_level[1]) / 3.0) else: p_s_s_on_level[1] = -log(p_s / (level1_cubes - numblocked_on_level[1]) / 3.0) p_s_s_on_level[2] = -log(p_s / (level2_cubes - numblocked_on_level[2]) / 3.0) #~ print level1_cubes, numblocked_on_level[1], level2_cubes, numblocked_on_level[2] #~ print p_s_s_on_level[0], p_s_s_on_level[1], p_s_s_on_level[2] #~ p_s_s_on_level[0] = 0#-log((1.0 / 3) * p_s / level0_cubes) #~ p_s_s_on_level[1] = 0#-log((1.0 / 3) * p_s / (level1_cubes - numblocked_on_level[1])) #~ p_s_s_on_level[2] = 0#-log((1.0 / 3) * p_s / (level2_cubes - numblocked_on_level[2])) # find max freespace and normalize in the next step by that maximum shape_idx = 0 max_freespace_for_transition = 0 for tx in range(t_shape_x): for ty in range(t_shape_y): for tz in range(t_shape_z): # forward scan to determine number of unblocked cubes ahead (in direction of jump) to modify cost/p_s_s delta_x = tx - offset_x delta_y = ty - offset_y delta_z = tz - offset_z if delta_x != 0 or delta_y != 0 or delta_z != 0: numunblocked = 0 curr_x = sx curr_y = sy curr_z = sz while 0 <= curr_x < s_shape_x and 0 <= curr_y < s_shape_y and 0 <= curr_z < s_shape_z and numunblocked < freespace_scan / cubewidth: if blockedCubes[curr_x, curr_y, curr_z] == UN_BLOCKED: numunblocked = numunblocked + 1 curr_x = curr_x - delta_x curr_y = curr_y - delta_y # if we dont manipulate the z-delta handling jumping in z-order # will be very unlikely as there is normally no free space if abs(delta_x) > 0 or abs(delta_y) > 0: pass # do not touch z else: curr_z = curr_z - delta_z else: break freespace = numunblocked * cubewidth #~ if abs(delta_z) > 0: #~ freespace = freespace * 3 if freespace > max_freespace_for_transition: max_freespace_for_transition = freespace freespace_for_transition[shape_idx] = freespace shape_idx = shape_idx + 1 shape_idx = 0 renormalize_target = 0 cost_p_s_1 = 0 for tx in range(t_shape_x): for ty in range(t_shape_y): for tz in range(t_shape_z): delta_x = tx - offset_x delta_y = ty - offset_y delta_z = tz - offset_z freespace = freespace_for_transition[shape_idx] if abs(delta_x) == 2 or abs(delta_y) == 2: if transition_probs[sx, sy, sz, tx, ty, tz] != INFINITY: transition_probs[sx, sy, sz, tx, ty, tz] = p_s_s_on_level[2] * (max_freespace_for_transition - freespace + 1) cost_p_s_1 = cost_p_s_1 + p_s_s_on_level[2] elif abs(delta_x) == 1 or abs(delta_y) == 1 or abs(delta_z) == 1: if transition_probs[sx, sy, sz, tx, ty, tz] != INFINITY: transition_probs[sx, sy, sz, tx, ty, tz] = p_s_s_on_level[1] * (max_freespace_for_transition - freespace + 1) #* (1.0 - freespace / max_freespace) cost_p_s_1 = cost_p_s_1 + p_s_s_on_level[1] else: if transition_probs[sx, sy, sz, tx, ty, tz] != INFINITY: transition_probs[sx, sy, sz, tx, ty, tz] = p_s_s_on_level[0] * 3 cost_p_s_1 = cost_p_s_1 + p_s_s_on_level[0] #~ if transition_probs[sx, sy, sz, tx, ty, tz] != INFINITY: #~ print delta_x, delta_y, delta_z, transition_probs[sx, sy, sz, tx, ty, tz] if transition_probs[sx, sy, sz, tx, ty, tz] != INFINITY: renormalize_target = renormalize_target + transition_probs[sx, sy, sz, tx, ty, tz] shape_idx = shape_idx + 1 renormalize_ratio = cost_p_s_1 / renormalize_target renormalize_sum = 0 for tx in range(t_shape_x): for ty in range(t_shape_y): for tz in range(t_shape_z): if transition_probs[sx, sy, sz, tx, ty, tz] != INFINITY: transition_probs[sx, sy, sz, tx, ty, tz] = transition_probs[sx, sy, sz, tx, ty, tz] * renormalize_ratio #~ if transition_probs[sx, sy, sz, tx, ty, tz] != INFINITY: #~ renormalize_sum = renormalize_sum + transition_probs[sx, sy, sz, tx, ty, tz] #~ assert renormalize_sum == 0 or (renormalize_sum - 0.1 < cost_p_s < renormalize_sum + 0.1), '%s %s' % (cost_p_s, renormalize_sum) return transition_probs @cython.boundscheck(False) @cython.cdivision(True) def buildStatesWithEmissionProbs(data_shape, apdata_list, int cube_width=3, double default_sq_variance=1, pooled_variance=0): ''' build states from accesspoint volume images (apvis should have equal dimensions) @param apdata: list of accesspoints volumeimages @param cube_width: size of cube for state ''' assert len(apdata_list) > 0 # number of voxels in a state cdef int cube_count = cube_width * cube_width * cube_width cdef int vidi_x = data_shape[0], vidi_y = data_shape[1], vidi_z = data_shape[2] cdef int size_x = vidi_x / cube_width, size_y = vidi_y / cube_width, size_z = vidi_z / cube_width # first 3 dims x, y, z in state-space # dim 4 (mean, variance) # dim 5 apidx cdef np.ndarray[np.double_t, ndim=5] states states = np.zeros((size_x, size_y, size_z, 2, len(apdata_list)), dtype=np.double) cdef np.ndarray[np.double_t, ndim=3] vidata = np.zeros((vidi_x, vidi_y, vidi_z), dtype=np.double) # in state space cdef int sx, sy, sz # in volume image space cdef int vx, vy, vz cdef int bound_x1, bound_y1, bound_z1, bound_x2, bound_y2, bound_z2 # auxilary vars cdef int apidx # accesspoint index cdef double s, delta, mean, squared_variance for apidx, vidata in enumerate(apdata_list): for sx in range(size_x): bound_x1 = cube_width * sx bound_x2 = bound_x1 + cube_width if (bound_x2 >= vidi_x): # degenerated boundary cube - will contain less voxels # meaybe it would be better to sort them out bound_x2 = vidi_x for sy in range(size_y): bound_y1 = cube_width * sy bound_y2 = bound_y1 + cube_width if (bound_y2 >= vidi_y): # degenerated boundary cube bound_y2 = vidi_y for sz in range(size_z): bound_z1 = cube_width * sz bound_z2 = bound_z1 + cube_width if (bound_z2 >= vidi_z): # degenerated boundary cube bound_z2 = vidi_z # traverse cube coordinates in volume image #to calc mean s = 0 for vx in range(bound_x1, bound_x2): for vy in range(bound_y1, bound_y2): for vz in range(bound_z1, bound_z2): s += vidata[vx, vy, vz] mean = s / cube_count states[sx, sy, sz, 0, apidx] = mean if pooled_variance > 0: states[sx, sy, sz, 1, apidx] = pooled_variance else: # now calculate variance s = 0 for vx in range(bound_x1, bound_x2): for vy in range(bound_y1, bound_y2): for vz in range(bound_z1, bound_z2): delta = mean - vidata[vx, vy, vz] s += delta * delta # store squared variance squared_variance = s / cube_count if squared_variance == 0: # fully flat voxel data - will lead to singular gaussian squared_variance = default_sq_variance states[sx, sy, sz, 1, apidx] = squared_variance return states @cython.boundscheck(False) @cython.cdivision(True) def buildTriangleCubeIntersection(np.ndarray[np.float_t, ndim=3] bl_triangles, np.ndarray[np.float_t, ndim=3] unpass_triangles, np.ndarray[np.float_t, ndim=3] unbl_triangles, cubed_data_shape, int cube_width, vi_dimensions): di = vi_dimensions # do not confuse disx and sx # disx is the x-width of a voxel in the volume image dimension and sx is the x-loop var for the state space (the cubes) cdef float disx=di.sx, disy=di.sy, disz=di.sz, ditx=di.tx, dity=di.ty, ditz=di.tz cdef int size_x, size_y, size_z size_x, size_y, size_z = cubed_data_shape[0], cubed_data_shape[1], cubed_data_shape[2] cdef np.ndarray[np.int_t, ndim=3] blockedCubes = np.zeros((size_x, size_y, size_z), dtype=np.int) cdef int sx, sy, sz # in state space cdef int vx, vy, vz # in volume image space (raytracer space) cdef float x, y, z # in real world space cdef float real_cube_half_width_x = (cube_width * disx) / 2.0 cdef float real_cube_half_width_y = (cube_width * disy) / 2.0 cdef float real_cube_half_width_z = (cube_width * disz) / 2.0 cdef int i, len_triangles, set_to_val cdef float bc0, bc1, bc2, bhs0, bhs1, bhs2, tv0, tv1, tv2, tv3, tv4, tv5, tv6, tv7, tv8, xx bhs0 = real_cube_half_width_x bhs1 = real_cube_half_width_y bhs2 = real_cube_half_width_z cdef np.ndarray[np.float_t, ndim=3] triangles for triangles, set_to_val in ((bl_triangles, BLOCKED), (unpass_triangles, UNPASSABLE), (unbl_triangles, UN_BLOCKED)): len_triangles = len(triangles) for i in prange(len_triangles, nogil=True): tv0 = triangles[i, 0, 0] tv1 = triangles[i, 0, 1] tv2 = triangles[i, 0, 2] tv3 = triangles[i, 1, 0] tv4 = triangles[i, 1, 1] tv5 = triangles[i, 1, 2] tv6 = triangles[i, 2, 0] tv7 = triangles[i, 2, 1] tv8 = triangles[i, 2, 2] for sx in range(size_x): vx = sx * cube_width x = vx * disx + ditx bc0 = x + real_cube_half_width_x for sy in range(size_y): vy = sy * cube_width y = vy * disy + dity bc1 = y + real_cube_half_width_y for sz in range(size_z): vz = sz * cube_width z = vz * disz + ditz bc2 = z + real_cube_half_width_z if triBoxOverlap(bc0, bc1, bc2, bhs0, bhs1, bhs2, tv0, tv1, tv2, tv3, tv4, tv5, tv6, tv7, tv8): blockedCubes[sx, sy, sz] = set_to_val return blockedCubes @cython.boundscheck(False) @cython.cdivision(True) def evalUnreachableAirCubes(np.ndarray[np.int_t, ndim=3] blockedCubes, cube_height, max_reachable_height): cdef int size_x, size_y, size_z size_x, size_y, size_z = blockedCubes.shape[0], blockedCubes.shape[1], blockedCubes.shape[2] cdef int max_cubes_above = max_reachable_height / cube_height cdef int sx, sy, sz, zi # in state space for sz in range(size_z): for sx in range(size_x): for sy in range(size_y): # find a currently unblocked/unmarked element if blockedCubes[sx, sy, sz] == UN_BLOCKED: # scan in z+ direction and mark everything > max_cubes_above as unreachable # stop if next blocking element hit, probably the floor above us zi = sz while zi < size_z and blockedCubes[sx, sy, zi] == UN_BLOCKED: if zi - sz > max_cubes_above: blockedCubes[sx, sy, zi] = AIR # mark as ureachable air cube zi = zi + 1 elif blockedCubes[sx, sy, sz] == UNPASSABLE: # scan in z+ direction and if a UN_BLOCKED chain is starting - mark every UN_BLOCKED to UNPASSABLE # stop if next blocking element hit, probably the floor above us zi = sz + 1 while zi < size_z and blockedCubes[sx, sy, zi] == UN_BLOCKED: blockedCubes[sx, sy, zi] = UNPASSABLE # mark as unpassable air cube zi = zi + 1 @cython.boundscheck(False) def viewPruningHistory(histories, idx, orgshape, int cubewidth, count, int view_values=1, int raw_values=0): cdef np.ndarray[np.double_t, ndim=2] history = histories[idx] cdef np.ndarray[np.double_t, ndim=3] result cdef int size_x = orgshape[0] cdef int size_y = orgshape[1] cdef int size_z = orgshape[2] result = np.zeros((size_x, size_y, size_z), dtype=np.double) if count > history.shape[0]: count = history.shape[0] - 1 cdef double cost, min_cost = INFINITY cdef int i for i in range(history.shape[0] - count, history.shape[0]): cost = history[i, 3] if cost < min_cost: min_cost = cost cdef int x, y, z, j1, j2, j3, start, end start = history.shape[0] - count end = history.shape[0] for i in prange(start, end, nogil=True): x = history[i, 0] * cubewidth y = history[i, 1] * cubewidth z = history[i, 2] * cubewidth cost = history[i, 3] for j1 in range(cubewidth): if x + j1 >= size_x: continue for j2 in range(cubewidth): if y + j2 >= size_y: continue for j3 in range(cubewidth): if z + j3 >= size_z: continue if view_values: if raw_values: result[x + j1, y + j2, z + j3] = -min_cost else: result[x + j1, y + j2, z + j3] = cost - min_cost else: # view positions result[x + j1, y + j2, z + j3] = i - start return result