Skip to content
Snippets Groups Projects
Commit c8f52100 authored by maali's avatar maali
Browse files

Add predict_pose script

parent 3e3382b7
No related branches found
No related tags found
No related merge requests found
import sys
import tensorflow as tf
import numpy as np
import random
import math
import statistics
import os
import data
import models
import cv2
from scipy.spatial.transform import Rotation as R
import argparse
def dictToArray(hypDict): # take dictionary keypoints and return list object
coordArray = np.zeros((len(hypDict.keys()), 2))
for key, hyps in hypDict.items():
coordArray[key] = np.array([round(hyps[1]), round(hyps[0])]) # x, y format
return coordArray
def ransacVal(y1, x1, v2): # dot product of unit vectors to find cos(theta difference)
v2 = v2 / np.linalg.norm(v2)
return y1 * v2[1] + x1 * v2[0]
def determineOutlier(input, yMean, yDev, xMean, xDev):
return abs(input[0] - yMean) > yDev or abs(input[1] - xMean) > xDev
def pruneHypsStdDev(hypDict, m=2): # prune generated hypotheses using mean and stdDev
for key, hyps in hypDict.items():
yVals, xVals = [x[0][0] for x in hyps], [x[0][1] for x in hyps]
yMean, xMean = statistics.mean(yVals), statistics.mean(xVals)
yDev, xDev = statistics.pstdev(yVals) * m, statistics.pstdev(xVals) * m
hypDict[key] = [x for x in hyps if not determineOutlier(x[0], yMean, yDev, xMean, xDev)]
def getMean(hypDict): # get weighted average of coordinates
meanDict = {}
for key, hyps in hypDict.items():
xMean = 0
yMean = 0
totalWeight = 0
for hyp in hyps:
yMean += hyp[0][0] * hyp[1]
xMean += hyp[0][1] * hyp[1]
totalWeight += hyp[1]
yMean /= totalWeight
xMean /= totalWeight
meanDict[key] = [yMean, xMean]
return meanDict
def predict_pose(class_name, image, fps_points):
nnInput = np.array([image])
# loading our model to predict unit vectors per pixel per keypoint on image
vecModel = models.stvNetNew(outVectors=True, outClasses=False)
vecModel.load_weights(f'models/stvNet_new_coords_{class_name}') # loading weights for standard labels model
vecModel.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.Huber())
# loading our class model for image segmentation
classModel = models.uNet(outVectors=False, outClasses=True)
classModel.load_weights(f'models/uNet_classes_{class_name}')
classModel.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.BinaryCrossentropy())
vecPred = vecModel.predict(nnInput)[0]
classPred = classModel.predict(nnInput)[0]
# print("Vector Prediction shape: " + str(vecPred.shape))
# print("Class Prediction shape: " + str(classPred.shape))
# showImage(classPred) # let's see our class prediction output
# ====================
population = np.where(classPred > .9)[:2] # .9
population = list(zip(population[0], population[1]))
# print(len(population)) # the number of class pixels found
# ====================
hypDict = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: []}
for n in range(50): # take two pixels, find intersection of unit vectors
# print(n)
p1 = population.pop(random.randrange(len(population)))
v1 = vecPred[p1[0]][p1[1]]
p2 = population.pop(random.randrange(len(population)))
v2 = vecPred[p2[0]][p2[1]]
# print(p1, p2)
# print(v1, v2)
for i in range(9): # find lines intersection, use as hypothesis
m1 = v1[i * 2 + 1] / v1[i * 2]
m2 = v2[i * 2 + 1] / v2[i * 2]
b1 = p1[0] - p1[1] * m1
b2 = p2[0] - p2[1] * m2
x = (b2 - b1) / (m1 - m2)
y = m1 * x + b1
if (y >= p1[0] != v1[i * 2 + 1] < 0 or x >= p1[1] != v1[i * 2] < 0 or y >= p2[0] != v2[
i * 2 + 1] < 0 or x >=
p2[1] != v2[i * 2] < 0) or not (
m1 - m2): # check if line intersection takes place according to unit vector directions
continue
# print(y, x)
weight = 0
for voter in population: # voting for fit of hypothesis
yDiff = y - voter[0]
xDiff = x - voter[1]
mag = math.sqrt(yDiff ** 2 + xDiff ** 2)
vec = vecPred[voter[0]][voter[1]][i * 2: i * 2 + 2]
if ransacVal(yDiff / mag, xDiff / mag, vec) > .99:
weight += 1
hypDict[i].append(((y, x), weight))
population.append(p1)
population.append(p2)
# print("--------------------")
# print("Coordinate hypotheses and weights: " + str(hypDict[0]))
# print("# Coordinate hypotheses and weights: " + str(len(hypDict[0])))
# ================
pruneHypsStdDev(hypDict)
# print("# Coordinate hypotheses and weights: " + str(len(hypDict[0])))
# ==========================
meanDict = getMean(hypDict)
# print(meanDict)
# =============================
preds = dictToArray(meanDict)[:8]
matrix = np.array(
[[543.25272224, 0., 320.25], [0., 724.33696299, 240.33333333], [0., 0., 1.]]) # camera matrix GUIMOD
_, rVec, tVec = cv2.solvePnP(fps_points, preds, matrix, np.zeros(shape=[8, 1], dtype='float64'),
flags=cv2.SOLVEPNP_ITERATIVE)
return rVec, tVec
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument("-cls_name", "--class_name", type=str,
default='kiwi1',
help="[kiwi1, pear2, banana1, orange, peach1]")
args = vars(ap.parse_args())
class_name = args["class_name"]
# class_name = 'pear'
basePath = os.path.dirname(os.path.realpath(__file__)) + '/Generated_Worlds_/Generated_Worlds_Evaluating/' + class_name
fps = np.loadtxt(f'Generated_Worlds_/Generated/{class_name}/{class_name}_fps_3d.txt')
images_ls, labels_ls, mask_ls, choice_ls = data.getAllValData(class_name)
print(len(images_ls))
for i, img in enumerate(images_ls):
img_id = choice_ls[i].split('.png')
img_id = int(img_id[0])
r_pre, t_pre = predict_pose(class_name, img, fps)
r = R.from_rotvec(r_pre.reshape(3, ))
r_pre_mx = np.array(r.as_matrix())
res = np.zeros((3, 4))
res[:3, :3] = r_pre_mx
res[:3, 3] = t_pre
np.save(f'{basePath}/Pose_prediction/{class_name}/{img_id}.npy', res) # save
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment