Y=np.arange(0,2048)
X=np.arange(0,2048)
(XX_field,YY_field)=np.meshgrid(X,Y)
#I mount the X, Y and disparity in a same 3D array
stock = np.concatenate((np.expand_dims(XX_field,2),np.expand_dims(YY_field,2)),axis=2)
XY_disp = np.concatenate((stock,np.expand_dims(disp,2)),axis=2)
XY_disp_reshape = XY_disp.reshape(XY_disp.shape[0]*XY_disp.shape[1],3)
Ts = np.hstack((np.zeros((3,3)),T_0)) #i use only the translations obtained with the rectified calibration...Is it correct?# I establish the projective matrix with the homography matrix
P11 = np.dot(rectmat1,C1)
P1 = np.vstack((np.hstack((P11,np.zeros((3,1)))),np.zeros((1,4))))
P1[3,3] = 1# P1 = np.dot(C1,np.hstack((np.identity(3),np.zeros((3,1)))))
P22 = np.dot(np.dot(rectmat2,C2),Ts)
P2 = np.vstack((P22,np.zeros((1,4))))
P2[3,3] = 1
lambda_t = cv2.norm(P1[0,:].T)/cv2.norm(P2[0,:].T)
#I define the reconstruction matrix
Q = np.zeros((4,4))
Q[0,:] = P1[0,:].T
Q[1,:] = P1[1,:].T
Q[2,:] = lambda_t*P2[1,:].T - P1[1,:].T
Q[3,:] = P1[2,:].T
#I do the calculation to get my 3D coordinates
test = []
for i inrange(0,XY_disp_reshape.shape[0]):
a = np.dot(inv(Q),np.expand_dims(np.concatenate((XY_disp_reshape[i,:],np.ones((1))),axis=0),axis=1))
test.append(a)
test = np.asarray(test)
XYZ = test[:,:,0].reshape(XY_disp.shape[0],XY_disp.shape[1],4)
I’m trying to get a depth map with an uncalibrated method.
I can obtain the fundamental matrix by finding correspondent points with SIFT and then using cv2.findFundamentalMat. I then use cv2.stereoRectifyUncalibrated to get the homography matrices for each image. Finally I use cv2.warpPerspective to rectify and compute the disparity, but this doesn’t create a good depth map. The values are very high so I’m wondering if I have to use warpPerspective or if I have to calculate a rotation matrix from the homography matrices I got with stereoRectifyUncalibrated.
I’m not sure of the projective matrix with the case of homography matrix obtained with the stereoRectifyUncalibrated to rectify.
A part of the code:
#Obtainment of the correspondent point with SIFT
sift = cv2.SIFT()
###find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(dst1,None)
kp2, des2 = sift.detectAndCompute(dst2,None)
###FLANN parameters
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)
good = []
pts1 = []
pts2 = []
###ratio test as per Lowe's paper
for i,(m,n) in enumerate(matches):
if m.distance < 0.8*n.distance:
good.append(m)
pts2.append(kp2[m.trainIdx].pt)
pts1.append(kp1[m.queryIdx].pt)
pts1 = np.array(pts1)
pts2 = np.array(pts2)
#Computation of the fundamental matrix
F,mask= cv2.findFundamentalMat(pts1,pts2,cv2.FM_LMEDS)
# Obtainment of the rectification matrix and use of the warpPerspective to transform them...
pts1 = pts1[:,:][mask.ravel()==1]
pts2 = pts2[:,:][mask.ravel()==1]
pts1 = np.int32(pts1)
pts2 = np.int32(pts2)
p1fNew = pts1.reshape((pts1.shape[0] * 2, 1))
p2fNew = pts2.reshape((pts2.shape[0] * 2, 1))
retBool ,rectmat1, rectmat2 = cv2.stereoRectifyUncalibrated(p1fNew,p2fNew,F,(2048,2048))
dst11 = cv2.warpPerspective(dst1,rectmat1,(2048,2048))
dst22 = cv2.warpPerspective(dst2,rectmat2,(2048,2048))
#calculation of the disparity
stereo = cv2.StereoBM(cv2.STEREO_BM_BASIC_PRESET,ndisparities=16*10, SADWindowSize=9)
disp = stereo.compute(dst22.astype(uint8), dst11.astype(uint8)).astype(np.float32)
plt.imshow(disp);plt.colorbar();plt.clim(0,400)#;plt.show()
plt.savefig("0gauche.png")
#plot depth by using disparity focal length `C1[0,0]` from stereo calibration and `T[0]` the distance between cameras
plt.imshow(C1[0,0]*T[0]/(disp),cmap='hot');plt.clim(-0,500);plt.colorbar();plt.show()
Here are the rectified pictures with the uncalibrated method (and warpPerspective):
Here are the rectified pictures with the calibrated method:
I don’t know how the difference is so important between the two kind of pictures. And for the calibrated method, it doesn’t seem aligned.
The disparity map using the uncalibrated method:
The depths are calculated with : C1[0,0]*T[0]/(disp)
with T from the stereoCalibrate. The values are very high.
———— EDIT LATER ————
I tried to “mount” the reconstruction matrix ([Devernay97], [Garcia01]) with the homography matrix obtained with “stereoRectifyUncalibrated”, but the result is still not good. Am I doing this correctly?
Y=np.arange(0,2048)
X=np.arange(0,2048)
(XX_field,YY_field)=np.meshgrid(X,Y)
#I mount the X, Y and disparity in a same 3D array
stock = np.concatenate((np.expand_dims(XX_field,2),np.expand_dims(YY_field,2)),axis=2)
XY_disp = np.concatenate((stock,np.expand_dims(disp,2)),axis=2)
XY_disp_reshape = XY_disp.reshape(XY_disp.shape[0]*XY_disp.shape[1],3)
Ts = np.hstack((np.zeros((3,3)),T_0)) #i use only the translations obtained with the rectified calibration...Is it correct?
# I establish the projective matrix with the homography matrix
P11 = np.dot(rectmat1,C1)
P1 = np.vstack((np.hstack((P11,np.zeros((3,1)))),np.zeros((1,4))))
P1[3,3] = 1
# P1 = np.dot(C1,np.hstack((np.identity(3),np.zeros((3,1)))))
P22 = np.dot(np.dot(rectmat2,C2),Ts)
P2 = np.vstack((P22,np.zeros((1,4))))
P2[3,3] = 1
lambda_t = cv2.norm(P1[0,:].T)/cv2.norm(P2[0,:].T)
#I define the reconstruction matrix
Q = np.zeros((4,4))
Q[0,:] = P1[0,:].T
Q[1,:] = P1[1,:].T
Q[2,:] = lambda_t*P2[1,:].T - P1[1,:].T
Q[3,:] = P1[2,:].T
#I do the calculation to get my 3D coordinates
test = []
for i in range(0,XY_disp_reshape.shape[0]):
a = np.dot(inv(Q),np.expand_dims(np.concatenate((XY_disp_reshape[i,:],np.ones((1))),axis=0),axis=1))
test.append(a)
test = np.asarray(test)
XYZ = test[:,:,0].reshape(XY_disp.shape[0],XY_disp.shape[1],4)
import cv2
import numpy as np
import matplotlib.pyplot as plt
imgL = cv2.imread("tsukuba_l.png", cv2.IMREAD_GRAYSCALE) # left image
imgR = cv2.imread("tsukuba_r.png", cv2.IMREAD_GRAYSCALE) # right imagedefget_keypoints_and_descriptors(imgL, imgR):"""Use ORB detector and FLANN matcher to get keypoints, descritpors,
and corresponding matches that will be good for computing
homography.
"""
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(imgL, None)
kp2, des2 = orb.detectAndCompute(imgR, None)
############## Using FLANN matcher ############### Each keypoint of the first image is matched with a number of# keypoints from the second image. k=2 means keep the 2 best matches# for each keypoint (best matches = the ones with the smallest# distance measurement).
FLANN_INDEX_LSH = 6
index_params = dict(
algorithm=FLANN_INDEX_LSH,
table_number=6, # 12
key_size=12, # 20
multi_probe_level=1,
) # 2
search_params = dict(checks=50) # or pass empty dictionary
flann = cv2.FlannBasedMatcher(index_params, search_params)
flann_match_pairs = flann.knnMatch(des1, des2, k=2)
return kp1, des1, kp2, des2, flann_match_pairs
deflowes_ratio_test(matches, ratio_threshold=0.6):"""Filter matches using the Lowe's ratio test.
The ratio test checks if matches are ambiguous and should be
removed by checking that the two distances are sufficiently
different. If they are not, then the match at that keypoint is
ignored.
/programming/51197091/how-does-the-lowes-ratio-test-work
"""
filtered_matches = []
for m, n in matches:
if m.distance < ratio_threshold * n.distance:
filtered_matches.append(m)
return filtered_matches
defdraw_matches(imgL, imgR, kp1, des1, kp2, des2, flann_match_pairs):"""Draw the first 8 mathces between the left and right images."""# https://docs.opencv.org/4.2.0/d4/d5d/group__features2d__draw.html# https://docs.opencv.org/2.4/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html
img = cv2.drawMatches(
imgL,
kp1,
imgR,
kp2,
flann_match_pairs[:8],
None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
)
cv2.imshow("Matches", img)
cv2.imwrite("ORB_FLANN_Matches.png", img)
cv2.waitKey(0)
defcompute_fundamental_matrix(matches, kp1, kp2, method=cv2.FM_RANSAC):"""Use the set of good mathces to estimate the Fundamental Matrix.
See https://en.wikipedia.org/wiki/Eight-point_algorithm#The_normalized_eight-point_algorithm
for more info.
"""
pts1, pts2 = [], []
fundamental_matrix, inliers = None, Nonefor m in matches[:8]:
pts1.append(kp1[m.queryIdx].pt)
pts2.append(kp2[m.trainIdx].pt)
if pts1 and pts2:
# You can play with the Threshold and confidence values here# until you get something that gives you reasonable results. I# used the defaults
fundamental_matrix, inliers = cv2.findFundamentalMat(
np.float32(pts1),
np.float32(pts2),
method=method,
# ransacReprojThreshold=3,# confidence=0.99,
)
return fundamental_matrix, inliers, pts1, pts2
############## Find good keypoints to use ##############
kp1, des1, kp2, des2, flann_match_pairs = get_keypoints_and_descriptors(imgL, imgR)
good_matches = lowes_ratio_test(flann_match_pairs, 0.2)
draw_matches(imgL, imgR, kp1, des1, kp2, des2, good_matches)
############## Compute Fundamental Matrix ##############
F, I, points1, points2 = compute_fundamental_matrix(good_matches, kp1, kp2)
############## Stereo rectify uncalibrated ##############
h1, w1 = imgL.shape
h2, w2 = imgR.shape
thresh = 0
_, H1, H2 = cv2.stereoRectifyUncalibrated(
np.float32(points1), np.float32(points2), F, imgSize=(w1, h1), threshold=thresh,
)
############## Undistort (Rectify) ##############
imgL_undistorted = cv2.warpPerspective(imgL, H1, (w1, h1))
imgR_undistorted = cv2.warpPerspective(imgR, H2, (w2, h2))
cv2.imwrite("undistorted_L.png", imgL_undistorted)
cv2.imwrite("undistorted_R.png", imgR_undistorted)
############## Calculate Disparity (Depth Map) ############### Using StereoBM
stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
disparity_BM = stereo.compute(imgL_undistorted, imgR_undistorted)
plt.imshow(disparity_BM, "gray")
plt.colorbar()
plt.show()
# Using StereoSGBM# Set disparity parameters. Note: disparity range is tuned according to# specific parameters obtained through trial and error.
win_size = 2
min_disp = -4
max_disp = 9
num_disp = max_disp - min_disp # Needs to be divisible by 16
stereo = cv2.StereoSGBM_create(
minDisparity=min_disp,
numDisparities=num_disp,
blockSize=5,
uniquenessRatio=5,
speckleWindowSize=5,
speckleRange=5,
disp12MaxDiff=2,
P1=8 * 3 * win_size ** 2,
P2=32 * 3 * win_size ** 2,
)
disparity_SGBM = stereo.compute(imgL_undistorted, imgR_undistorted)
plt.imshow(disparity_SGBM, "gray")
plt.colorbar()
plt.show()
The large black areas of your calibrated rectified images would lead me to believe that for those, calibration was not done very well. There’s a variety of reasons that could be at play, maybe the physical setup, maybe lighting when you did calibration, etc., but there are plenty of camera calibration tutorials out there for that and my understanding is that you are asking for a way to get a better depth map from an uncalibrated setup (this isn’t 100% clear, but the title seems to support this and I think that’s what people will come here to try to find).
Your basic approach is correct, but the results can definitely be improved. This form of depth mapping is not among those that produce the highest quality maps (especially being uncalibrated). The biggest improvement will likely come from using a different stereo matching algorithm. The lighting may also be having a significant effect. The right image (at least to my naked eye) appears to be less well lit which could interfere with the reconstruction. You could first try brightening it to the same level as the other, or gather new images if that is possible. From here out, I’ll assume you have no access to the original cameras, so I’ll consider gathering new images, altering the setup, or performing calibration to be out of scope. (If you do have access to the setup and cameras, then I would suggest checking calibration and using a calibrated method as this will work better).
You used StereoBM for calculating your disparity (depth map) which does work, but StereoSGBM is much better suited for this application (it handles smoother edges better). You can see the difference below.
This article explains the differences in more depth:
Block matching focuses on high texture images (think a picture of a tree) and semi-global block matching will focus on sub pixel level matching and pictures with more smooth textures (think a picture of a hallway).
Without any explicit intrinsic camera parameters, specifics about the camera setup (like focal distance, distance between the cameras, distance to the subject, etc.), a known dimension in the image, or motion (to use structure from motion), you can only obtain 3D reconstruction up to a projective transform; you won’t have a sense of scale or necessarily rotation either, but you can still generate a relative depth map. You will likely suffer from some barrel and other distortions which could be removed with proper camera calibration, but you can get reasonable results without it as long as the cameras aren’t terrible (lens system isn’t too distorted) and are set up pretty close to canonical configuration (which basically means they are oriented such that their optical axes are as close to parallel as possible, and their fields of view overlap sufficiently). This doesn’t however appear to be the OPs issue as he did manage to get alright rectified images with the uncalibrated method.
Basic Procedure
Find at least 5 well-matched points in both images you can use to calculate the Fundamental Matrix (you can use any detector and matcher you like, I kept FLANN but used ORB to do detection as SIFT isn’t in the main version of OpenCV for 4.2.0)
Calculate the Fundamental Matrix, F, with findFundamentalMat
Undistort your images with stereoRectifyUncalibrated and warpPerspective
Calculate Disparity (Depth Map) with StereoSGBM
The results are much better:
Matches with ORB and FLANN
Undistorted images (left, then right)
Disparity
StereoBM
This result looks similar to the OPs problems (speckling, gaps, wrong depths in some areas).
StereoSGBM (tuned)
This result looks much better and uses roughly the same method as the OP, minus the final disparity calculation, making me think the OP would see similar improvements on his images, had they been provided.
Post filtering
There’s a good article about this in the OpenCV docs. I’d recommend looking at it if you need really smooth maps.
The example photos above are frame 1 from the scene ambush_2 in the MPI Sintel Dataset.
Full code (Tested on OpenCV 4.2.0):
import cv2
import numpy as np
import matplotlib.pyplot as plt
imgL = cv2.imread("tsukuba_l.png", cv2.IMREAD_GRAYSCALE) # left image
imgR = cv2.imread("tsukuba_r.png", cv2.IMREAD_GRAYSCALE) # right image
def get_keypoints_and_descriptors(imgL, imgR):
"""Use ORB detector and FLANN matcher to get keypoints, descritpors,
and corresponding matches that will be good for computing
homography.
"""
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(imgL, None)
kp2, des2 = orb.detectAndCompute(imgR, None)
############## Using FLANN matcher ##############
# Each keypoint of the first image is matched with a number of
# keypoints from the second image. k=2 means keep the 2 best matches
# for each keypoint (best matches = the ones with the smallest
# distance measurement).
FLANN_INDEX_LSH = 6
index_params = dict(
algorithm=FLANN_INDEX_LSH,
table_number=6, # 12
key_size=12, # 20
multi_probe_level=1,
) # 2
search_params = dict(checks=50) # or pass empty dictionary
flann = cv2.FlannBasedMatcher(index_params, search_params)
flann_match_pairs = flann.knnMatch(des1, des2, k=2)
return kp1, des1, kp2, des2, flann_match_pairs
def lowes_ratio_test(matches, ratio_threshold=0.6):
"""Filter matches using the Lowe's ratio test.
The ratio test checks if matches are ambiguous and should be
removed by checking that the two distances are sufficiently
different. If they are not, then the match at that keypoint is
ignored.
https://stackoverflow.com/questions/51197091/how-does-the-lowes-ratio-test-work
"""
filtered_matches = []
for m, n in matches:
if m.distance < ratio_threshold * n.distance:
filtered_matches.append(m)
return filtered_matches
def draw_matches(imgL, imgR, kp1, des1, kp2, des2, flann_match_pairs):
"""Draw the first 8 mathces between the left and right images."""
# https://docs.opencv.org/4.2.0/d4/d5d/group__features2d__draw.html
# https://docs.opencv.org/2.4/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html
img = cv2.drawMatches(
imgL,
kp1,
imgR,
kp2,
flann_match_pairs[:8],
None,
flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
)
cv2.imshow("Matches", img)
cv2.imwrite("ORB_FLANN_Matches.png", img)
cv2.waitKey(0)
def compute_fundamental_matrix(matches, kp1, kp2, method=cv2.FM_RANSAC):
"""Use the set of good mathces to estimate the Fundamental Matrix.
See https://en.wikipedia.org/wiki/Eight-point_algorithm#The_normalized_eight-point_algorithm
for more info.
"""
pts1, pts2 = [], []
fundamental_matrix, inliers = None, None
for m in matches[:8]:
pts1.append(kp1[m.queryIdx].pt)
pts2.append(kp2[m.trainIdx].pt)
if pts1 and pts2:
# You can play with the Threshold and confidence values here
# until you get something that gives you reasonable results. I
# used the defaults
fundamental_matrix, inliers = cv2.findFundamentalMat(
np.float32(pts1),
np.float32(pts2),
method=method,
# ransacReprojThreshold=3,
# confidence=0.99,
)
return fundamental_matrix, inliers, pts1, pts2
############## Find good keypoints to use ##############
kp1, des1, kp2, des2, flann_match_pairs = get_keypoints_and_descriptors(imgL, imgR)
good_matches = lowes_ratio_test(flann_match_pairs, 0.2)
draw_matches(imgL, imgR, kp1, des1, kp2, des2, good_matches)
############## Compute Fundamental Matrix ##############
F, I, points1, points2 = compute_fundamental_matrix(good_matches, kp1, kp2)
############## Stereo rectify uncalibrated ##############
h1, w1 = imgL.shape
h2, w2 = imgR.shape
thresh = 0
_, H1, H2 = cv2.stereoRectifyUncalibrated(
np.float32(points1), np.float32(points2), F, imgSize=(w1, h1), threshold=thresh,
)
############## Undistort (Rectify) ##############
imgL_undistorted = cv2.warpPerspective(imgL, H1, (w1, h1))
imgR_undistorted = cv2.warpPerspective(imgR, H2, (w2, h2))
cv2.imwrite("undistorted_L.png", imgL_undistorted)
cv2.imwrite("undistorted_R.png", imgR_undistorted)
############## Calculate Disparity (Depth Map) ##############
# Using StereoBM
stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
disparity_BM = stereo.compute(imgL_undistorted, imgR_undistorted)
plt.imshow(disparity_BM, "gray")
plt.colorbar()
plt.show()
# Using StereoSGBM
# Set disparity parameters. Note: disparity range is tuned according to
# specific parameters obtained through trial and error.
win_size = 2
min_disp = -4
max_disp = 9
num_disp = max_disp - min_disp # Needs to be divisible by 16
stereo = cv2.StereoSGBM_create(
minDisparity=min_disp,
numDisparities=num_disp,
blockSize=5,
uniquenessRatio=5,
speckleWindowSize=5,
speckleRange=5,
disp12MaxDiff=2,
P1=8 * 3 * win_size ** 2,
P2=32 * 3 * win_size ** 2,
)
disparity_SGBM = stereo.compute(imgL_undistorted, imgR_undistorted)
plt.imshow(disparity_SGBM, "gray")
plt.colorbar()
plt.show()
# pts1 –> an array of feature points in a first camera# pts2 –> an array of feature points in a first camera# fm –> input fundamental matrix# imgSize -> size of an image# rhm1 -> output rectification homography matrix for a first image# rhm2 -> output rectification homography matrix for a second image# thres –> optional threshold used to filter out outliers
There might be several possible issues resulting in low-quality Depth Channel and Disparity Channel what leads us to low-quality stereo sequence. Here are 6 of those issues:
Possible issue I
Incomplete Formula
As a word uncalibrated implies, stereoRectifyUncalibrated instance method calculates a rectification transformations for you, in case you don’t know or can’t know intrinsic parameters of your stereo pair and its relative position in the environment.
# pts1 –> an array of feature points in a first camera
# pts2 –> an array of feature points in a first camera
# fm –> input fundamental matrix
# imgSize -> size of an image
# rhm1 -> output rectification homography matrix for a first image
# rhm2 -> output rectification homography matrix for a second image
# thres –> optional threshold used to filter out outliers
So, you do not take into account three parameters: rhm1, rhm2 and thres. If a threshold > 0, all point pairs that don’t comply with a epipolar geometry are rejected prior to computing the homographies. Otherwise, all points are considered inliers. This formula looks like this:
(pts2[i]^t * fm * pts1[i]) > thres
# t –> translation vector between coordinate systems of cameras
Thus, I believe that visual inaccuracies might appear due to an incomplete formula’s calculation.
A robust interaxial distance between left and right camera lenses must be not greater than 200 mm. When the interaxial distance is larger than the interocular distance, the effect is called hyperstereoscopy or hyperdivergence and results not only in depth exaggeration in the scene but also in viewer’s physical inconvenience. Read Autodesk’s Stereoscopic Filmmaking Whitepaper to find out more on this topic.
Possible issue III
Parallel vs Toed-In camera mode
Visual inaccuracies in resulted Disparity Map may occur due to incorrect Camera Mode calculation. Many stereographers prefer Toe-In camera mode but Pixar, for example, prefers Parallel camera mode.
Possible issue IV
Vertical Alignment
In stereoscopy, if a vertical shift occurs (even if one of the views is shifted up by 1 mm) it ruins a robust stereo experience. So, before generating Disparity Map you must be sure that left and right views of your stereo pair are accordingly aligned. Look at Technicolor Sterreoscopic Whitepaper about 15 common problems in stereo.
Stereo Rectification Matrix:
┌ ┐
| f 0 cx tx |
| 0 f cy ty | # use "ty" value to fix vertical shift in one image
| 0 0 1 0 |
└ ┘
Lens Distortion is very important topic in stereo composition. Before generating a Disparity Map you need to undistort left and right views, after this generate a disparity channel, and then redistort both views again.
Possible issue VI
Low-quality Depth channel without anti-aliasing
For creating a high-quality Disparity Map you need left and right Depth Channels that must be pre-generated. When you work in 3D package you can render a high-quality Depth Channel (with crisp edges) with just one click. But generating a high-quality depth channel from video sequence is not easy because stereo pair has to move in your environment for producing an initial data for future depth-from-motion algorithm. If there’s no motion in a frame a depth channel will be extremely poor.
Also, Depth channel itself has one more drawback – its edges do not match the edges of the RGB because it has no anti-aliasing.
Disparity channel code snippet:
Here I’d like to represent a quick approach to generate a Disparity Map:
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
imageLeft = cv.imread('paris_left.png', 0)
imageRight = cv.imread('paris_right.png', 0)
stereo = cv.StereoBM_create(numDisparities=16, blockSize=15)
disparity = stereo.compute(imageLeft, imageRight)
plt.imshow(disparity, 'gray')
plt.show()
So I’ve followed this tutorial but it doesn’t seem to do anything. Simply nothing. It waits a few seconds and closes the program. What is wrong with this code?
import cv2
vidcap = cv2.VideoCapture('Compton.mp4')
success,image = vidcap.read()
count = 0
success = True
while success:
success,image = vidcap.read()
cv2.imwrite("frame%d.jpg" % count, image) # save frame as JPEG file
if cv2.waitKey(10) == 27: # exit if Escape is hit
break
count += 1
Also, in the comments it says that this limits the frames to 1000? Why?
EDIT:
I tried doing success = True first but that didn’t help. It only created one image that was 0 bytes.
From here download this video so we have the same video file for the test. Make sure to have that mp4 file in the same directory of your python code. Then also make sure to run the python interpreter from the same directory.
Then modify the code, ditch waitKey that’s wasting time also without a window it cannot capture the keyboard events. Also we print the success value to make sure it’s reading the frames successfully.
import cv2
vidcap = cv2.VideoCapture('big_buck_bunny_720p_5mb.mp4')
success,image = vidcap.read()
count = 0
while success:
cv2.imwrite("frame%d.jpg" % count, image) # save frame as JPEG file
success,image = vidcap.read()
print('Read a new frame: ', success)
count += 1
To extend on this question (& answer by @user2700065) for a slightly different cases, if anyone does not want to extract every frame but wants to extract frame every one second. So a 1-minute video will give 60 frames(images).
import sys
import argparse
import cv2
print(cv2.__version__)
def extractImages(pathIn, pathOut):
count = 0
vidcap = cv2.VideoCapture(pathIn)
success,image = vidcap.read()
success = True
while success:
vidcap.set(cv2.CAP_PROP_POS_MSEC,(count*1000)) # added this line
success,image = vidcap.read()
print ('Read a new frame: ', success)
cv2.imwrite( pathOut + "\\frame%d.jpg" % count, image) # save frame as JPEG file
count = count + 1
if __name__=="__main__":
a = argparse.ArgumentParser()
a.add_argument("--pathIn", help="path to video")
a.add_argument("--pathOut", help="path to images")
args = a.parse_args()
print(args)
extractImages(args.pathIn, args.pathOut)
回答 2
这是来自@GShocked的python 3.x以前答案的调整,我将其发布到注释中,但信誉不足
import sys
import argparse
import cv2
print(cv2.__version__)def extractImages(pathIn, pathOut):
vidcap = cv2.VideoCapture(pathIn)
success,image = vidcap.read()
count =0
success =Truewhile success:
success,image = vidcap.read()print('Read a new frame: ', success)
cv2.imwrite( pathOut +"\\frame%d.jpg"% count, image)# save frame as JPEG file
count +=1if __name__=="__main__":print("aba")
a = argparse.ArgumentParser()
a.add_argument("--pathIn", help="path to video")
a.add_argument("--pathOut", help="path to images")
args = a.parse_args()print(args)
extractImages(args.pathIn, args.pathOut)
This is a tweak from previous answer for python 3.x from @GShocked, I would post it to the comment, but dont have enough reputation
import sys
import argparse
import cv2
print(cv2.__version__)
def extractImages(pathIn, pathOut):
vidcap = cv2.VideoCapture(pathIn)
success,image = vidcap.read()
count = 0
success = True
while success:
success,image = vidcap.read()
print ('Read a new frame: ', success)
cv2.imwrite( pathOut + "\\frame%d.jpg" % count, image) # save frame as JPEG file
count += 1
if __name__=="__main__":
print("aba")
a = argparse.ArgumentParser()
a.add_argument("--pathIn", help="path to video")
a.add_argument("--pathOut", help="path to images")
args = a.parse_args()
print(args)
extractImages(args.pathIn, args.pathOut)
回答 3
此功能可将大多数视频格式转换为视频中的帧数。它的工作原理上Python3与OpenCV 3+
import cv2
import time
import os
def video_to_frames(input_loc, output_loc):"""Function to extract frames from input video file
and save them as separate frames in an output directory.
Args:
input_loc: Input video file.
output_loc: Output directory to save the frames.
Returns:
None
"""try:
os.mkdir(output_loc)exceptOSError:pass# Log the time
time_start = time.time()# Start capturing the feed
cap = cv2.VideoCapture(input_loc)# Find the number of frames
video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))-1print("Number of frames: ", video_length)
count =0print("Converting video..\n")# Start converting the videowhile cap.isOpened():# Extract the frame
ret, frame = cap.read()# Write the results back to output location.
cv2.imwrite(output_loc +"/%#05d.jpg"%(count+1), frame)
count = count +1# If there are no more frames leftif(count >(video_length-1)):# Log the time again
time_end = time.time()# Release the feed
cap.release()# Print statsprint("Done extracting frames.\n%d frames extracted"% count)print("It took %d seconds forconversion."%(time_end-time_start))breakif __name__=="__main__":
input_loc ='/path/to/video/00009.MTS'
output_loc ='/path/to/output/frames/'
video_to_frames(input_loc, output_loc)
This is Function which will convert most of the video formats to number of frames there are in the video. It works on Python3 with OpenCV 3+
import cv2
import time
import os
def video_to_frames(input_loc, output_loc):
"""Function to extract frames from input video file
and save them as separate frames in an output directory.
Args:
input_loc: Input video file.
output_loc: Output directory to save the frames.
Returns:
None
"""
try:
os.mkdir(output_loc)
except OSError:
pass
# Log the time
time_start = time.time()
# Start capturing the feed
cap = cv2.VideoCapture(input_loc)
# Find the number of frames
video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
print ("Number of frames: ", video_length)
count = 0
print ("Converting video..\n")
# Start converting the video
while cap.isOpened():
# Extract the frame
ret, frame = cap.read()
# Write the results back to output location.
cv2.imwrite(output_loc + "/%#05d.jpg" % (count+1), frame)
count = count + 1
# If there are no more frames left
if (count > (video_length-1)):
# Log the time again
time_end = time.time()
# Release the feed
cap.release()
# Print stats
print ("Done extracting frames.\n%d frames extracted" % count)
print ("It took %d seconds forconversion." % (time_end-time_start))
break
if __name__=="__main__":
input_loc = '/path/to/video/00009.MTS'
output_loc = '/path/to/output/frames/'
video_to_frames(input_loc, output_loc)
It supports .mts and normal files like .mp4 and .avi. Tried and Tested on .mts files. Works like a Charm.
After a lot of research on how to convert frames to video I have created this function hope this helps. We require opencv for this:
import cv2
import numpy as np
import os
def frames_to_video(inputpath,outputpath,fps):
image_array = []
files = [f for f in os.listdir(inputpath) if isfile(join(inputpath, f))]
files.sort(key = lambda x: int(x[5:-4]))
for i in range(len(files)):
img = cv2.imread(inputpath + files[i])
size = (img.shape[1],img.shape[0])
img = cv2.resize(img,size)
image_array.append(img)
fourcc = cv2.VideoWriter_fourcc('D', 'I', 'V', 'X')
out = cv2.VideoWriter(outputpath,fourcc, fps, size)
for i in range(len(image_array)):
out.write(image_array[i])
out.release()
inputpath = 'folder path'
outpath = 'video file path/video.mp4'
fps = 29
frames_to_video(inputpath,outpath,fps)
change the value of fps(frames per second),input folder path and output folder path according to your own local locations
回答 5
先前的答案丢失了第一帧。而且最好将图像存储在文件夹中。
# create a folder to store extracted imagesimport os
folder ='test'
os.mkdir(folder)# use opencv to do the jobimport cv2
print(cv2.__version__)# my version is 3.1.0
vidcap = cv2.VideoCapture('test_video.mp4')
count =0whileTrue:
success,image = vidcap.read()ifnot success:break
cv2.imwrite(os.path.join(folder,"frame{:d}.jpg".format(count)), image)# save frame as JPEG file
count +=1print("{} images are extacted in {}.".format(count,folder))
The previous answers have lost the first frame. And it will be nice to store the images in a folder.
# create a folder to store extracted images
import os
folder = 'test'
os.mkdir(folder)
# use opencv to do the job
import cv2
print(cv2.__version__) # my version is 3.1.0
vidcap = cv2.VideoCapture('test_video.mp4')
count = 0
while True:
success,image = vidcap.read()
if not success:
break
cv2.imwrite(os.path.join(folder,"frame{:d}.jpg".format(count)), image) # save frame as JPEG file
count += 1
print("{} images are extacted in {}.".format(count,folder))
By the way, you can check the frame rate by VLC. Go to windows -> media information -> codec details
回答 6
此代码从视频中提取帧并将帧保存为.jpg formate
import cv2
import numpy as np
import os
# set video file path of input video with name and extension
vid = cv2.VideoCapture('VideoPath')ifnot os.path.exists('images'):
os.makedirs('images')#for frame identity
index =0while(True):# Extract images
ret, frame = vid.read()# end of framesifnot ret:break# Saves images
name ='./images/frame'+ str(index)+'.jpg'print('Creating...'+ name)
cv2.imwrite(name, frame)# next frame
index +=1
This code extract frames from the video and save the frames in .jpg formate
import cv2
import numpy as np
import os
# set video file path of input video with name and extension
vid = cv2.VideoCapture('VideoPath')
if not os.path.exists('images'):
os.makedirs('images')
#for frame identity
index = 0
while(True):
# Extract images
ret, frame = vid.read()
# end of frames
if not ret:
break
# Saves images
name = './images/frame' + str(index) + '.jpg'
print ('Creating...' + name)
cv2.imwrite(name, frame)
# next frame
index += 1
I am using Python via Anaconda’s Spyder software. Using the original code listed in the question of this thread by @Gshocked, the code does not work (the python won’t read the mp4 file). So I downloaded OpenCV 3.2 and copied “opencv_ffmpeg320.dll” and “opencv_ffmpeg320_64.dll” from the “bin” folder. I pasted both of these dll files to Anaconda’s “Dlls” folder.
Anaconda also has a “pckgs” folder…I copied and pasted the entire “OpenCV 3.2” folder that I downloaded to the Anaconda “pckgs” folder.
Finally, Anaconda has a “Library” folder which has a “bin” subfolder. I pasted the “opencv_ffmpeg320.dll” and “opencv_ffmpeg320_64.dll” files to that folder.
After closing and restarting Spyder, the code worked. I’m not sure which of the three methods worked, and I’m too lazy to go back and figure it out. But it works so, cheers!
回答 8
此功能以1 fps的速度从视频中提取图像,此外它还标识最后一帧并停止读取:
import cv2
import numpy as np
def extract_image_one_fps(video_source_path):
vidcap = cv2.VideoCapture(video_source_path)
count =0
success =Truewhile success:
vidcap.set(cv2.CAP_PROP_POS_MSEC,(count*1000))
success,image = vidcap.read()## Stop when last frame is identified
image_last = cv2.imread("frame{}.png".format(count-1))if np.array_equal(image,image_last):break
cv2.imwrite("frame%d.png"% count, image)# save frame as PNG fileprint'{}.sec reading a new frame: {} '.format(count,success)
count +=1
Following script will extract frames every half a second of all videos in folder. (Works on python 3.7)
import cv2
import os
listing = os.listdir(r'D:/Images/AllVideos')
count=1
for vid in listing:
vid = r"D:/Images/AllVideos/"+vid
vidcap = cv2.VideoCapture(vid)
def getFrame(sec):
vidcap.set(cv2.CAP_PROP_POS_MSEC,sec*1000)
hasFrames,image = vidcap.read()
if hasFrames:
cv2.imwrite("D:/Images/Frames/image"+str(count)+".jpg", image) # Save frame as JPG file
return hasFrames
sec = 0
frameRate = 0.5 # Change this number to 1 for each 1 second
success = getFrame(sec)
while success:
count = count + 1
sec = sec + frameRate
sec = round(sec, 2)
success = getFrame(sec)
The ‘Image.fromarray’ is a little ugly because I clip incoming data to [0,255], convert to bytes, then create a grayscale image. I mostly work in gray.
import numpy as np
def img_as_array(im):"""OpenCV's native format to a numpy array view"""
w, h, n = im.width, im.height, im.channels
modes ={1:"L",3:"RGB",4:"RGBA"}if n notin modes:raiseException('unsupported number of channels: {0}'.format(n))
out = np.asarray(im)if n !=1:
out = out[:,:,::-1]# BGR -> RGB conversionreturn out
OpenCV image format supports the numpy array interface. A helper function can be made to support either grayscale or color images. This means the BGR -> RGB conversion can be conveniently done with a numpy slice, not a full copy of image data.
Note: this is a stride trick, so modifying the output array will also change the OpenCV image data. If you want a copy, use .copy() method on the array!
import numpy as np
def img_as_array(im):
"""OpenCV's native format to a numpy array view"""
w, h, n = im.width, im.height, im.channels
modes = {1: "L", 3: "RGB", 4: "RGBA"}
if n not in modes:
raise Exception('unsupported number of channels: {0}'.format(n))
out = np.asarray(im)
if n != 1:
out = out[:, :, ::-1] # BGR -> RGB conversion
return out
回答 8
我也采用了imageio,但发现以下机器可用于预处理和后期处理:
import imageio
import numpy as np
def imload(*a,**k):
i = imageio.imread(*a,**k)
i = i.transpose((1,0,2))# x and y are mixed up for some reason...
i = np.flip(i,1)# make coordinate system right-handed!!!!!!return i/255def imsave(i, url,*a,**k):# Original order of arguments was counterintuitive. It should# read verbally "Save the image to the URL" — not "Save to the# URL the image."
i = np.flip(i,1)
i = i.transpose((1,0,2))
i *=255
i = i.round()
i = np.maximum(i,0)
i = np.minimum(i,255)
i = np.asarray(i, dtype=np.uint8)
imageio.imwrite(url, i,*a,**k)
I also adopted imageio, but I found the following machinery useful for pre- and post-processing:
import imageio
import numpy as np
def imload(*a, **k):
i = imageio.imread(*a, **k)
i = i.transpose((1, 0, 2)) # x and y are mixed up for some reason...
i = np.flip(i, 1) # make coordinate system right-handed!!!!!!
return i/255
def imsave(i, url, *a, **k):
# Original order of arguments was counterintuitive. It should
# read verbally "Save the image to the URL" — not "Save to the
# URL the image."
i = np.flip(i, 1)
i = i.transpose((1, 0, 2))
i *= 255
i = i.round()
i = np.maximum(i, 0)
i = np.minimum(i, 255)
i = np.asarray(i, dtype=np.uint8)
imageio.imwrite(url, i, *a, **k)
The rationale is that I am using numpy for image processing, not just image displaying. For this purpose, uint8s are awkward, so I convert to floating point values ranging from 0 to 1.
When saving images, I noticed I had to cut the out-of-range values myself, or else I ended up with a really gray output. (The gray output was the result of imageio compressing the full range, which was outside of [0, 256), to values that were inside the range.)
There were a couple other oddities, too, which I mentioned in the comments.
回答 9
您可以使用numpy和轻松获得RGB图片的numpy数组Image from PIL
import numpy as np
from PIL importImageimport matplotlib.pyplot as plt
im =Image.open('*image_name*')#These two lines
im_arr = np.array(im)#are all you need
plt.imshow(im_arr)#Just to verify that image array has been constructed properly
You can get numpy array of rgb image easily by using numpy and Image from PIL
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
im = Image.open('*image_name*') #These two lines
im_arr = np.array(im) #are all you need
plt.imshow(im_arr) #Just to verify that image array has been constructed properly
回答 10
使用以下语法加载图像:
from keras.preprocessing import image
X_test=image.load_img('four.png',target_size=(28,28),color_mode="grayscale");#loading image and then convert it into grayscale and with it's target size
X_test=image.img_to_array(X_test);#convert image into array
from keras.preprocessing import image
X_test=image.load_img('four.png',target_size=(28,28),color_mode="grayscale"); #loading image and then convert it into grayscale and with it's target size
X_test=image.img_to_array(X_test); #convert image into array
But that line throws the error shown in the title of this question.
I have OpenCV installed in C:\lib\opencv on this 64-bit machine. I’m using 64-bit Python.
My PYTHONPATH variable: PYTHONPATH=C:\lib\opencv\build\python\2.7. This folder contains cv2.pyd and that’s all.
My PATH variable: Path=%OPENCV_DIR%\bin;... This folder contains 39 DLL files such as opencv_core246d.dll.
OPENCV_DIR has this value: OPENCV_DIR=C:\lib\opencv\build\x64\vc11.
The solution at ImportError: DLL load failed: %1 is not a valid Win32 application says to add “the new opencv binaries path (C:\opencv\build\bin\Release) to the Windows PATH environment variable”. But as shown above, I already have the OpenCV binaries folder (C:\lib\opencv\build\x64\vc11\bin) in my PATH. And my OpenCV installation doesn’t have any Release folders (except for an empty one under build/java).
Any ideas as to what’s going wrong? Can I tell Python to verbosely trace the loading process? Exactly what DLL’s is it looking for?
Thanks,
Lars
EDIT:
I just noticed that, according to http://www.dependencywalker.com/, the cv2.pyd in C:\lib\opencv\build\python\2.7 is 32-bit, whereas the machine and the Python I’m running are 64-bit. Could that be the problem? And if so, where can I find a 64-bit version of cv2.pyd?
Please check if the python version you are using is also 64 bit. If not then that could be the issue. You would be using a 32 bit python version and would have installed a 64 bit binaries for the OPENCV library.
Wow, I found yet another case for this problem. None of the above worked. Eventually I used python’s ability to introspect what was being loaded. For python 2.7 this means:
import imp
imp.find_module("cv2")
This turned up a completely unexpected “cv2.pyd” file in an Anaconda DLL directory that wasn’t touched by multiple uninstall/install attempts. Python was looking there first and not finding my good installation. I deleted that cv2.pyd file and tried imp.find_module(“cv2”) again and python immediately found the right file and cv2 started working.
So if none of the other solutions work for you, make sure you use python introspection to see what file python is trying to load.
In my case, I have 64bit python, and it was lxml that was the wrong version–I should have been using the x64 version of that as well. I solved this by downloading the 64-bit version of lxml here:
If your build-system (CMake in my case) copies the file from <name>.dll to <name>.pyd, you will get this error if the original file wasn’t actually a dll. In my case, building shared libraries got switched off, so the underlying file was actually a *.lib.
I discovered this error by loading the pyd file in DependencyWalker and finding that it wasn’t valid.
I uninstalled the pywin32 module. To uninstall execute the following command in Command Prompt.
pip uninstall pywin32
Then, I reinstalled pywin32. To install it, open the Command Prompt in the same directory where the pywin32 wheel file lies. Then execute the following command.
pip install <Name of the wheel file with extension>
Wheel file will be like: piwin32-XXX-cpXX-none-win32.whl
It solvs the problem for me. You may also like to give it a try. Hope it work for you as well.
I copied cv2.pyd file from /opencv/build/python/2.7/x86 folder instead of from /x64 folder to C:/Python27/Lib/site-packeges. I followed rest of the instructions provided here.
Added by someone else, not verified: I also copy file cv2.pyd to folder C:/Python27/Lib/site-packages/cv2. It works.
For me the problem was that I was using different versions of Python in the same Eclipse project. My setup was not consistent with the Project Properties and the Run Configuration Python versions.
In Project > Properties > PyDev, I had the Interpreter set to Python2.7.11.
In Run Configurations > Interpreter, I was using the Default Interpreter. Changing it to Python 2.7.11 fixed the problem.
I faced the same issue when I uninstalled and reinstalled a different version of 2.7.x of Python on my system using a 32 bit Windows Installer. I got the same error on most of my import statements.
I uninstalled the newly installed Python and downloaded a 64 bit Windows installer and reinstalled Python again and it worked.
Hope this helps you.
So I had problems installing vtk under windows (as I use python 3.7 there is no binary available so far just for older python versions pip install vtk is not working)
I did wrote python in my cmd:
Python 3.7.3 on win32
So I now know I have python 3.7.3 runing on a 32 bit.
Please make sure that you have installed python 2.7.12 or below version otherwise you will get this error definitely.
Make sure Oracle client is 64 bit installed if OS is 64 Bit.
Make sure Microsoft Visual C++ Compiler for Python 2.7 is 64 for bit for 64 bit Os or 32 bit for 32 bit.
Note:- IF ur OS is 64 bit install all package of 64 bit or if Os is 32 bit install 32 bit package.
I’m using opencv 2.4.2, python 2.7
The following simple code created a window of the correct name, but its content is just blank and doesn’t show the image:
I faced the same issue. I tried to read an image from IDLE and tried to display it using cv2.imshow(), but the display window freezes and shows pythonw.exe is not responding when trying to close the window.
The post below gives a possible explanation for why this is happening
“Basically, don’t do this from IDLE. Write a script and run it from the shell or the script directly if in windows, by naming it with a .pyw extension and double clicking it. There is apparently a conflict between IDLE’s own event loop and the ones from GUI toolkits.“
When I used imshow() in a script and execute it rather than running it directly over IDLE, it worked.
If you choose to use “cv2.waitKey(0)”, be sure that you have written “cv2.waitKey(0)” instead of “cv2.waitkey(0)”, because that lowercase “k” might freeze your program too.
I also had a -215 error. I thought imshow was the issue, but when I changed imread to read in a non-existent file I got no error there. So I put the image file in the working folder and added cv2.waitKey(0) and it worked.
How to get the size of an image in cv2 wrapper in Python OpenCV (numpy). Is there a correct way to do that other than numpy.shape(). How can I get it in these format dimensions: (width, height) list?
cv2 uses numpy for manipulating images, so the proper and best way to get the size of an image is using numpy.shape. Assuming you are working with BGR images, here is an example:
I’m afraid there is no “better” way to get this size, however it’s not that much pain.
Of course your code should be safe for both binary/mono images as well as multi-channel ones, but the principal dimensions of the image always come first in the numpy array’s shape. If you opt for readability, or don’t want to bother typing this, you can wrap it up in a function, and give it a name you like, e.g. cv_size:
Writing functions with def is not fun while working interactively.
Edit
Originally I thought that using [:2] was OK, but the numpy shape is (height, width[, depth]), and we need (width, height), as e.g. cv2.resize expects, so – we must use [1::-1]. Even less memorable than [:2]. And who remembers reverse slicing anyway?
/usr/local/lib/python2.7/dist-packages/cv2/__init__.py in<module>()78# make IDE's (PyCharm) autocompletion happy---->9from.cv2 import*1011# wildcard import above does not import "private" variables like __version__ImportError: libSM.so.6: cannot open shared object file:No such file or directory
When trying to import OpenCV, using import cv2 I get the following error:
/usr/local/lib/python2.7/dist-packages/cv2/__init__.py in <module>()
7
8 # make IDE's (PyCharm) autocompletion happy
----> 9 from .cv2 import *
10
11 # wildcard import above does not import "private" variables like __version__
ImportError: libSM.so.6: cannot open shared object file: No such file or directory
There is now a headless version of opencv-python which removes the graphical dependencies (like libSM). You can see the normal / headless version on the releases page (and the GitHub issue leading to this); just add -headless when installing, e.g.,
pip install opencv-python-headless
# also contrib, if needed
pip install opencv-contrib-python-headless
May be the problem is with your python-opencv version. It’s better to downgrade your version to 3.3.0.9 which does not include any GUI dependencies. Same question was found on GitHub here the link to the answer.
But I think the image is not getting converted to CV format. The Window shows me a large brown image.
Where am I going wrong in Converting image from PIL to CV format?
Also , why do i need to type cv.cv to access functions?
I want to use OpenCV2.0 and Python2.6 to show resized images. I used and adopted this example but unfortunately, this code is for OpenCV2.1 and does not seem to be working on 2.0. Here my code:
import os, glob
import cv
ulpath = "exampleshq/"
for infile in glob.glob( os.path.join(ulpath, "*.jpg") ):
im = cv.LoadImage(infile)
thumbnail = cv.CreateMat(im.rows/10, im.cols/10, cv.CV_8UC3)
cv.Resize(im, thumbnail)
cv.NamedWindow(infile)
cv.ShowImage(infile, thumbnail)
cv.WaitKey(0)
cv.DestroyWindow(name)
Since I cannot use
cv.LoadImageM
I used
cv.LoadImage
instead, which was no problem in other applications. Nevertheless, cv.iplimage has no attribute rows, cols or size. Can anyone give me a hint, how to solve this problem?
dst = cv2.resize(src, None, fx = 2, fy = 2, interpolation = cv2.INTER_CUBIC),
where fx is the scaling factor along the horizontal axis and fy along the vertical axis.
To shrink an image, it will generally look best with INTER_AREA interpolation, whereas to enlarge an image, it will generally look best with INTER_CUBIC (slow) or INTER_LINEAR (faster but still looks OK).
Example shrink image to fit a max height/width (keeping aspect ratio)
import cv2
img = cv2.imread('YOUR_PATH_TO_IMG')
height, width = img.shape[:2]
max_height = 300
max_width = 300
# only shrink if img is bigger than required
if max_height < height or max_width < width:
# get scaling factor
scaling_factor = max_height / float(height)
if max_width/float(width) < scaling_factor:
scaling_factor = max_width / float(width)
# resize image
img = cv2.resize(img, None, fx=scaling_factor, fy=scaling_factor, interpolation=cv2.INTER_AREA)
cv2.imshow("Shrinked image", img)
key = cv2.waitKey()
You could use the GetSize function to get those information,
cv.GetSize(im)
would return a tuple with the width and height of the image.
You can also use im.depth and img.nChan to get some more information.
And to resize an image, I would use a slightly different process, with another image instead of a matrix. It is better to try to work with the same type of data:
# Resizes a image and maintains aspect ratiodef maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):# Grab the image size and initialize dimensions
dim =None(h, w)= image.shape[:2]# Return original image if no need to resizeif width isNoneand height isNone:return image
# We are resizing height if width is noneif width isNone:# Calculate the ratio of the height and construct the dimensions
r = height / float(h)
dim =(int(w * r), height)# We are resizing width if height is noneelse:# Calculate the ratio of the width and construct the dimensions
r = width / float(w)
dim =(width, int(h * r))# Return the resized imagereturn cv2.resize(image, dim, interpolation=inter)
Here’s a function to upscale or downscale an image by desired width or height while maintaining aspect ratio
# Resizes a image and maintains aspect ratio
def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
# Grab the image size and initialize dimensions
dim = None
(h, w) = image.shape[:2]
# Return original image if no need to resize
if width is None and height is None:
return image
# We are resizing height if width is none
if width is None:
# Calculate the ratio of the height and construct the dimensions
r = height / float(h)
dim = (int(w * r), height)
# We are resizing width if height is none
else:
# Calculate the ratio of the width and construct the dimensions
r = width / float(w)
dim = (width, int(h * r))
# Return the resized image
return cv2.resize(image, dim, interpolation=inter)
I was doing a fun project: Solving a Sudoku from an input image using OpenCV (as in Google goggles etc). And I have completed the task, but at the end I found a little problem for which I came here.
I did the programming using Python API of OpenCV 2.3.1.
Below is what I did :
Read the image
Find the contours
Select the one with maximum area, ( and also somewhat equivalent to square).
Find the corner points.
e.g. given below:
(Notice here that the green line correctly coincides with the true boundary of the Sudoku, so the Sudoku can be correctly warped. Check next image)
Performing the step 4 on this image gives the result below:
The red line drawn is the original contour which is the true outline of sudoku boundary.
The green line drawn is approximated contour which will be the outline of warped image.
Which of course, there is difference between green line and red line at the top edge of sudoku. So while warping, I am not getting the original boundary of the Sudoku.
My Question :
How can I warp the image on the correct boundary of the Sudoku, i.e. the red line OR how can I remove the difference between red line and green line? Is there any method for this in OpenCV?
回答 0
我有一个可行的解决方案,但是您必须自己将其转换为OpenCV。它用Mathematica编写。
第一步是通过将每个像素除以关闭操作的结果来调整图像的亮度:
src =ColorConvert[Import["http://davemark.com/images/sudoku.jpg"],"Grayscale"];
white =Closing[src,DiskMatrix[5]];
srcAdjusted =Image[ImageData[src]/ImageData[white]]
The next step is to find the sudoku area, so I can ignore (mask out) the background. For that, I use connected component analysis, and select the component that’s got the largest convex area:
I use connected component analysis again to extract the grid lines from these images. The grid lines are much longer than the digits, so I can use caliper length to select only the grid lines-connected components. Sorting them by position, I get 2×10 mask images for each of the vertical/horizontal grid lines in the image:
Next I take each pair of vertical/horizontal grid lines, dilate them, calculate the pixel-by-pixel intersection, and calculate the center of the result. These points are the grid line intersections:
All of the operations are basic image processing function, so this should be possible in OpenCV, too. The spline-based image transformation might be harder, but I don’t think you really need it. Probably using the perspective transformation you use now on each individual cell will give good enough results.
contour, hier = cv2.findContours(res,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
centroids =[]for cnt in contour:
mom = cv2.moments(cnt)(x,y)= int(mom['m10']/mom['m00']), int(mom['m01']/mom['m00'])
cv2.circle(img,(x,y),4,(0,255,0),-1)
centroids.append((x,y))
但是结果质心将不会被排序。查看下图以查看其顺序:
因此,我们从左到右,从上到下对它们进行排序。
centroids = np.array(centroids,dtype = np.float32)
c = centroids.reshape((100,2))
c2 = c[np.argsort(c[:,1])]
b = np.vstack([c2[i*10:(i+1)*10][np.argsort(c2[i*10:(i+1)*10,0])]for i in xrange(10)])
bm = b.reshape((10,10,2))
现在看下面他们的命令:
最后,我们应用转换并创建尺寸为450×450的新图像。
output = np.zeros((450,450,3),np.uint8)for i,j in enumerate(b):
ri = i/10
ci = i%10if ci !=9and ri!=9:
src = bm[ri:ri+2, ci:ci+2,:].reshape((4,2))
dst = np.array([[ci*50,ri*50],[(ci+1)*50-1,ri*50],[ci*50,(ri+1)*50-1],[(ci+1)*50-1,(ri+1)*50-1]], np.float32)
retval = cv2.getPerspectiveTransform(src,dst)
warp = cv2.warpPerspective(res2,retval,(450,450))
output[ri*50:(ri+1)*50-1, ci*50:(ci+1)*50-1]= warp[ri*50:(ri+1)*50-1, ci*50:(ci+1)*50-1].copy()
Nikie’s answer solved my problem, but his answer was in Mathematica. So I thought I should give its OpenCV adaptation here. But after implementing I could see that OpenCV code is much bigger than nikie’s mathematica code. And also, I couldn’t find interpolation method done by nikie in OpenCV ( although it can be done using scipy, i will tell it when time comes.)
1. Image PreProcessing ( closing operation )
import cv2
import numpy as np
img = cv2.imread('dave.jpg')
img = cv2.GaussianBlur(img,(5,5),0)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
mask = np.zeros((gray.shape),np.uint8)
kernel1 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11))
close = cv2.morphologyEx(gray,cv2.MORPH_CLOSE,kernel1)
div = np.float32(gray)/(close)
res = np.uint8(cv2.normalize(div,div,0,255,cv2.NORM_MINMAX))
res2 = cv2.cvtColor(res,cv2.COLOR_GRAY2BGR)
Result :
2. Finding Sudoku Square and Creating Mask Image
thresh = cv2.adaptiveThreshold(res,255,0,1,19,2)
contour,hier = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
max_area = 0
best_cnt = None
for cnt in contour:
area = cv2.contourArea(cnt)
if area > 1000:
if area > max_area:
max_area = area
best_cnt = cnt
cv2.drawContours(mask,[best_cnt],0,255,-1)
cv2.drawContours(mask,[best_cnt],0,0,2)
res = cv2.bitwise_and(res,mask)
Result :
3. Finding Vertical lines
kernelx = cv2.getStructuringElement(cv2.MORPH_RECT,(2,10))
dx = cv2.Sobel(res,cv2.CV_16S,1,0)
dx = cv2.convertScaleAbs(dx)
cv2.normalize(dx,dx,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dx,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernelx,iterations = 1)
contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
x,y,w,h = cv2.boundingRect(cnt)
if h/w > 5:
cv2.drawContours(close,[cnt],0,255,-1)
else:
cv2.drawContours(close,[cnt],0,0,-1)
close = cv2.morphologyEx(close,cv2.MORPH_CLOSE,None,iterations = 2)
closex = close.copy()
Result :
4. Finding Horizontal Lines
kernely = cv2.getStructuringElement(cv2.MORPH_RECT,(10,2))
dy = cv2.Sobel(res,cv2.CV_16S,0,2)
dy = cv2.convertScaleAbs(dy)
cv2.normalize(dy,dy,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dy,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernely)
contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
x,y,w,h = cv2.boundingRect(cnt)
if w/h > 5:
cv2.drawContours(close,[cnt],0,255,-1)
else:
cv2.drawContours(close,[cnt],0,0,-1)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,None,iterations = 2)
closey = close.copy()
Result :
Of course, this one is not so good.
5. Finding Grid Points
res = cv2.bitwise_and(closex,closey)
Result :
6. Correcting the defects
Here, nikie does some kind of interpolation, about which I don’t have much knowledge. And i couldn’t find any corresponding function for this OpenCV. (may be it is there, i don’t know).
Check out this SOF which explains how to do this using SciPy, which I don’t want to use : Image transformation in OpenCV
So, here I took 4 corners of each sub-square and applied warp Perspective to each.
For that, first we find the centroids.
contour, hier = cv2.findContours(res,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
centroids = []
for cnt in contour:
mom = cv2.moments(cnt)
(x,y) = int(mom['m10']/mom['m00']), int(mom['m01']/mom['m00'])
cv2.circle(img,(x,y),4,(0,255,0),-1)
centroids.append((x,y))
But resulting centroids won’t be sorted. Check out below image to see their order:
So we sort them from left to right, top to bottom.
centroids = np.array(centroids,dtype = np.float32)
c = centroids.reshape((100,2))
c2 = c[np.argsort(c[:,1])]
b = np.vstack([c2[i*10:(i+1)*10][np.argsort(c2[i*10:(i+1)*10,0])] for i in xrange(10)])
bm = b.reshape((10,10,2))
Now see below their order :
Finally we apply the transformation and create a new image of size 450×450.
output = np.zeros((450,450,3),np.uint8)
for i,j in enumerate(b):
ri = i/10
ci = i%10
if ci != 9 and ri!=9:
src = bm[ri:ri+2, ci:ci+2 , :].reshape((4,2))
dst = np.array( [ [ci*50,ri*50],[(ci+1)*50-1,ri*50],[ci*50,(ri+1)*50-1],[(ci+1)*50-1,(ri+1)*50-1] ], np.float32)
retval = cv2.getPerspectiveTransform(src,dst)
warp = cv2.warpPerspective(res2,retval,(450,450))
output[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1] = warp[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1].copy()
Result :
The result is almost same as nikie’s, but code length is large. May be, better methods are available out there, but until then, this works OK.
You could try to use some kind of grid based modeling of you arbitrary warping. And since the sudoku already is a grid, that shouldn’t be too hard.
So you could try to detect the boundaries of each 3×3 subregion and then warp each region individually. If the detection succeeds it would give you a better approximation.
I want to add that above method works only when sudoku board stands straight, otherwise height/width (or vice versa) ratio test will most probably fail and you will not be able to detect edges of sudoku. (I also want to add that if lines that are not perpendicular to the image borders, sobel operations (dx and dy) will still work as lines will still have edges with respect to both axes.)
To be able to detect straight lines you should work on contour or pixel-wise analysis such as contourArea/boundingRectArea, top left and bottom right points…
Edit: I managed to check whether a set of contours form a line or not by applying linear regression and checking the error. However linear regression performed poorly when slope of the line is too big (i.e. >1000) or it is very close to 0. Therefore applying the ratio test above (in most upvoted answer) before linear regression is logical and did work for me.
回答 4
为了去除未切割的角,我应用了伽玛校正,其伽玛值为0.8。
绘制红色圆圈以显示缺少的角。
代码是:
gamma =0.8
invGamma =1/gamma
table = np.array([((i /255.0)** invGamma)*255for i in np.arange(0,256)]).astype("uint8")
cv2.LUT(img, table, img)
I thought this was a great post, and a great solution by ARK; very well laid out and explained.
I was working on a similar problem, and built the entire thing. There were some changes (i.e. xrange to range, arguments in cv2.findContours), but this should work out of the box (Python 3.5, Anaconda).
This is a compilation of the elements above, with some of the missing code added (i.e., labeling of points).
'''
https://stackoverflow.com/questions/10196198/how-to-remove-convexity-defects-in-a-sudoku-square
'''
import cv2
import numpy as np
img = cv2.imread('test.png')
winname="raw image"
cv2.namedWindow(winname)
cv2.imshow(winname, img)
cv2.moveWindow(winname, 100,100)
img = cv2.GaussianBlur(img,(5,5),0)
winname="blurred"
cv2.namedWindow(winname)
cv2.imshow(winname, img)
cv2.moveWindow(winname, 100,150)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
mask = np.zeros((gray.shape),np.uint8)
kernel1 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11))
winname="gray"
cv2.namedWindow(winname)
cv2.imshow(winname, gray)
cv2.moveWindow(winname, 100,200)
close = cv2.morphologyEx(gray,cv2.MORPH_CLOSE,kernel1)
div = np.float32(gray)/(close)
res = np.uint8(cv2.normalize(div,div,0,255,cv2.NORM_MINMAX))
res2 = cv2.cvtColor(res,cv2.COLOR_GRAY2BGR)
winname="res2"
cv2.namedWindow(winname)
cv2.imshow(winname, res2)
cv2.moveWindow(winname, 100,250)
#find elements
thresh = cv2.adaptiveThreshold(res,255,0,1,19,2)
img_c, contour,hier = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
max_area = 0
best_cnt = None
for cnt in contour:
area = cv2.contourArea(cnt)
if area > 1000:
if area > max_area:
max_area = area
best_cnt = cnt
cv2.drawContours(mask,[best_cnt],0,255,-1)
cv2.drawContours(mask,[best_cnt],0,0,2)
res = cv2.bitwise_and(res,mask)
winname="puzzle only"
cv2.namedWindow(winname)
cv2.imshow(winname, res)
cv2.moveWindow(winname, 100,300)
# vertical lines
kernelx = cv2.getStructuringElement(cv2.MORPH_RECT,(2,10))
dx = cv2.Sobel(res,cv2.CV_16S,1,0)
dx = cv2.convertScaleAbs(dx)
cv2.normalize(dx,dx,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dx,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernelx,iterations = 1)
img_d, contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
x,y,w,h = cv2.boundingRect(cnt)
if h/w > 5:
cv2.drawContours(close,[cnt],0,255,-1)
else:
cv2.drawContours(close,[cnt],0,0,-1)
close = cv2.morphologyEx(close,cv2.MORPH_CLOSE,None,iterations = 2)
closex = close.copy()
winname="vertical lines"
cv2.namedWindow(winname)
cv2.imshow(winname, img_d)
cv2.moveWindow(winname, 100,350)
# find horizontal lines
kernely = cv2.getStructuringElement(cv2.MORPH_RECT,(10,2))
dy = cv2.Sobel(res,cv2.CV_16S,0,2)
dy = cv2.convertScaleAbs(dy)
cv2.normalize(dy,dy,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dy,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernely)
img_e, contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
x,y,w,h = cv2.boundingRect(cnt)
if w/h > 5:
cv2.drawContours(close,[cnt],0,255,-1)
else:
cv2.drawContours(close,[cnt],0,0,-1)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,None,iterations = 2)
closey = close.copy()
winname="horizontal lines"
cv2.namedWindow(winname)
cv2.imshow(winname, img_e)
cv2.moveWindow(winname, 100,400)
# intersection of these two gives dots
res = cv2.bitwise_and(closex,closey)
winname="intersections"
cv2.namedWindow(winname)
cv2.imshow(winname, res)
cv2.moveWindow(winname, 100,450)
# text blue
textcolor=(0,255,0)
# points green
pointcolor=(255,0,0)
# find centroids and sort
img_f, contour, hier = cv2.findContours(res,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
centroids = []
for cnt in contour:
mom = cv2.moments(cnt)
(x,y) = int(mom['m10']/mom['m00']), int(mom['m01']/mom['m00'])
cv2.circle(img,(x,y),4,(0,255,0),-1)
centroids.append((x,y))
# sorting
centroids = np.array(centroids,dtype = np.float32)
c = centroids.reshape((100,2))
c2 = c[np.argsort(c[:,1])]
b = np.vstack([c2[i*10:(i+1)*10][np.argsort(c2[i*10:(i+1)*10,0])] for i in range(10)])
bm = b.reshape((10,10,2))
# make copy
labeled_in_order=res2.copy()
for index, pt in enumerate(b):
cv2.putText(labeled_in_order,str(index),tuple(pt),cv2.FONT_HERSHEY_DUPLEX, 0.75, textcolor)
cv2.circle(labeled_in_order, tuple(pt), 5, pointcolor)
winname="labeled in order"
cv2.namedWindow(winname)
cv2.imshow(winname, labeled_in_order)
cv2.moveWindow(winname, 100,500)
# create final
output = np.zeros((450,450,3),np.uint8)
for i,j in enumerate(b):
ri = int(i/10) # row index
ci = i%10 # column index
if ci != 9 and ri!=9:
src = bm[ri:ri+2, ci:ci+2 , :].reshape((4,2))
dst = np.array( [ [ci*50,ri*50],[(ci+1)*50-1,ri*50],[ci*50,(ri+1)*50-1],[(ci+1)*50-1,(ri+1)*50-1] ], np.float32)
retval = cv2.getPerspectiveTransform(src,dst)
warp = cv2.warpPerspective(res2,retval,(450,450))
output[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1] = warp[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1].copy()
winname="final"
cv2.namedWindow(winname)
cv2.imshow(winname, output)
cv2.moveWindow(winname, 600,100)
cv2.waitKey(0)
cv2.destroyAllWindows()