标签归档:opencv

OpenCV –未校准立体声系统的深度图

问题:OpenCV –未校准立体声系统的深度图

我正在尝试使用未经校准的方法获得深度图。我可以通过使用SIFT查找对应点然后使用来获得基本矩阵cv2.findFundamentalMat。然后cv2.stereoRectifyUncalibrated,我用于获取每个图像的单应性矩阵。最后,我使用它cv2.warpPerspective来校正和计算视差,但这并不能创建良好的深度图。值非常高,所以我想知道是否必须使用warpPerspective或是否必须根据所获得的单应性矩阵计算旋转矩阵stereoRectifyUncalibrated

我不确定投影矩阵是否与通过校正得到的单应矩阵有关stereoRectifyUncalibrated

代码的一部分:

#Obtainment of the correspondent point with SIFT
sift = cv2.SIFT()

###find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(dst1,None)
kp2, des2 = sift.detectAndCompute(dst2,None)

###FLANN parameters
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)

flann = cv2.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)

good = []
pts1 = []
pts2 = []

###ratio test as per Lowe's paper
for i,(m,n) in enumerate(matches):
    if m.distance < 0.8*n.distance:
        good.append(m)
        pts2.append(kp2[m.trainIdx].pt)
        pts1.append(kp1[m.queryIdx].pt)
    
    
pts1 = np.array(pts1)
pts2 = np.array(pts2)

#Computation of the fundamental matrix
F,mask= cv2.findFundamentalMat(pts1,pts2,cv2.FM_LMEDS)


# Obtainment of the rectification matrix and use of the warpPerspective to transform them...
pts1 = pts1[:,:][mask.ravel()==1]
pts2 = pts2[:,:][mask.ravel()==1]

pts1 = np.int32(pts1)
pts2 = np.int32(pts2)

p1fNew = pts1.reshape((pts1.shape[0] * 2, 1))
p2fNew = pts2.reshape((pts2.shape[0] * 2, 1))
    
retBool ,rectmat1, rectmat2 = cv2.stereoRectifyUncalibrated(p1fNew,p2fNew,F,(2048,2048))

dst11 = cv2.warpPerspective(dst1,rectmat1,(2048,2048))
dst22 = cv2.warpPerspective(dst2,rectmat2,(2048,2048))

#calculation of the disparity
stereo = cv2.StereoBM(cv2.STEREO_BM_BASIC_PRESET,ndisparities=16*10, SADWindowSize=9)
disp = stereo.compute(dst22.astype(uint8), dst11.astype(uint8)).astype(np.float32)
plt.imshow(disp);plt.colorbar();plt.clim(0,400)#;plt.show()
plt.savefig("0gauche.png")

#plot depth by using disparity focal length `C1[0,0]` from stereo calibration and `T[0]` the distance between cameras

plt.imshow(C1[0,0]*T[0]/(disp),cmap='hot');plt.clim(-0,500);plt.colorbar();plt.show()

这是使用未校准方法(和warpPerspective)校正后的照片:

这是使用校准方法校正后的图片:

我不知道两种图片之间的区别有多么重要。对于校准方法,它似乎没有对齐。

使用未校准方法的视差图:

:深度计算与C1[0,0]*T[0]/(disp) 从以T stereoCalibrate。该值很高。

————编辑后————

我试图用通过“ stereoRectifyUncalibrated”获得的单应性矩阵“装载”重构矩阵([Devernay97][Garcia01]),但结果仍然不理想。我这样做正确吗?

Y=np.arange(0,2048)
X=np.arange(0,2048)
(XX_field,YY_field)=np.meshgrid(X,Y)

#I mount the X, Y and disparity in a same 3D array 
stock = np.concatenate((np.expand_dims(XX_field,2),np.expand_dims(YY_field,2)),axis=2)
XY_disp = np.concatenate((stock,np.expand_dims(disp,2)),axis=2)

XY_disp_reshape = XY_disp.reshape(XY_disp.shape[0]*XY_disp.shape[1],3)

Ts = np.hstack((np.zeros((3,3)),T_0)) #i use only the translations obtained with the rectified calibration...Is it correct?


# I establish the projective matrix with the homography matrix
P11 = np.dot(rectmat1,C1)
P1 = np.vstack((np.hstack((P11,np.zeros((3,1)))),np.zeros((1,4))))
P1[3,3] = 1

# P1 = np.dot(C1,np.hstack((np.identity(3),np.zeros((3,1)))))

P22 = np.dot(np.dot(rectmat2,C2),Ts)
P2 = np.vstack((P22,np.zeros((1,4))))
P2[3,3] = 1

lambda_t = cv2.norm(P1[0,:].T)/cv2.norm(P2[0,:].T)


#I define the reconstruction matrix
Q = np.zeros((4,4))

Q[0,:] = P1[0,:].T
Q[1,:] = P1[1,:].T
Q[2,:] = lambda_t*P2[1,:].T - P1[1,:].T
Q[3,:] = P1[2,:].T

#I do the calculation to get my 3D coordinates
test = []
for i in range(0,XY_disp_reshape.shape[0]):
    a = np.dot(inv(Q),np.expand_dims(np.concatenate((XY_disp_reshape[i,:],np.ones((1))),axis=0),axis=1))
    test.append(a)

test = np.asarray(test)

XYZ = test[:,:,0].reshape(XY_disp.shape[0],XY_disp.shape[1],4)

I’m trying to get a depth map with an uncalibrated method. I can obtain the fundamental matrix by finding correspondent points with SIFT and then using cv2.findFundamentalMat. I then use cv2.stereoRectifyUncalibrated to get the homography matrices for each image. Finally I use cv2.warpPerspective to rectify and compute the disparity, but this doesn’t create a good depth map. The values are very high so I’m wondering if I have to use warpPerspective or if I have to calculate a rotation matrix from the homography matrices I got with stereoRectifyUncalibrated.

I’m not sure of the projective matrix with the case of homography matrix obtained with the stereoRectifyUncalibrated to rectify.

A part of the code:

#Obtainment of the correspondent point with SIFT
sift = cv2.SIFT()

###find the keypoints and descriptors with SIFT
kp1, des1 = sift.detectAndCompute(dst1,None)
kp2, des2 = sift.detectAndCompute(dst2,None)

###FLANN parameters
FLANN_INDEX_KDTREE = 0
index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
search_params = dict(checks=50)

flann = cv2.FlannBasedMatcher(index_params,search_params)
matches = flann.knnMatch(des1,des2,k=2)

good = []
pts1 = []
pts2 = []

###ratio test as per Lowe's paper
for i,(m,n) in enumerate(matches):
    if m.distance < 0.8*n.distance:
        good.append(m)
        pts2.append(kp2[m.trainIdx].pt)
        pts1.append(kp1[m.queryIdx].pt)
    
    
pts1 = np.array(pts1)
pts2 = np.array(pts2)

#Computation of the fundamental matrix
F,mask= cv2.findFundamentalMat(pts1,pts2,cv2.FM_LMEDS)


# Obtainment of the rectification matrix and use of the warpPerspective to transform them...
pts1 = pts1[:,:][mask.ravel()==1]
pts2 = pts2[:,:][mask.ravel()==1]

pts1 = np.int32(pts1)
pts2 = np.int32(pts2)

p1fNew = pts1.reshape((pts1.shape[0] * 2, 1))
p2fNew = pts2.reshape((pts2.shape[0] * 2, 1))
    
retBool ,rectmat1, rectmat2 = cv2.stereoRectifyUncalibrated(p1fNew,p2fNew,F,(2048,2048))

dst11 = cv2.warpPerspective(dst1,rectmat1,(2048,2048))
dst22 = cv2.warpPerspective(dst2,rectmat2,(2048,2048))

#calculation of the disparity
stereo = cv2.StereoBM(cv2.STEREO_BM_BASIC_PRESET,ndisparities=16*10, SADWindowSize=9)
disp = stereo.compute(dst22.astype(uint8), dst11.astype(uint8)).astype(np.float32)
plt.imshow(disp);plt.colorbar();plt.clim(0,400)#;plt.show()
plt.savefig("0gauche.png")

#plot depth by using disparity focal length `C1[0,0]` from stereo calibration and `T[0]` the distance between cameras

plt.imshow(C1[0,0]*T[0]/(disp),cmap='hot');plt.clim(-0,500);plt.colorbar();plt.show()

Here are the rectified pictures with the uncalibrated method (and warpPerspective):

Here are the rectified pictures with the calibrated method:

I don’t know how the difference is so important between the two kind of pictures. And for the calibrated method, it doesn’t seem aligned.

The disparity map using the uncalibrated method:

The depths are calculated with : C1[0,0]*T[0]/(disp) with T from the stereoCalibrate. The values are very high.

———— EDIT LATER ————

I tried to “mount” the reconstruction matrix ([Devernay97], [Garcia01]) with the homography matrix obtained with “stereoRectifyUncalibrated”, but the result is still not good. Am I doing this correctly?

Y=np.arange(0,2048)
X=np.arange(0,2048)
(XX_field,YY_field)=np.meshgrid(X,Y)

#I mount the X, Y and disparity in a same 3D array 
stock = np.concatenate((np.expand_dims(XX_field,2),np.expand_dims(YY_field,2)),axis=2)
XY_disp = np.concatenate((stock,np.expand_dims(disp,2)),axis=2)

XY_disp_reshape = XY_disp.reshape(XY_disp.shape[0]*XY_disp.shape[1],3)

Ts = np.hstack((np.zeros((3,3)),T_0)) #i use only the translations obtained with the rectified calibration...Is it correct?


# I establish the projective matrix with the homography matrix
P11 = np.dot(rectmat1,C1)
P1 = np.vstack((np.hstack((P11,np.zeros((3,1)))),np.zeros((1,4))))
P1[3,3] = 1

# P1 = np.dot(C1,np.hstack((np.identity(3),np.zeros((3,1)))))

P22 = np.dot(np.dot(rectmat2,C2),Ts)
P2 = np.vstack((P22,np.zeros((1,4))))
P2[3,3] = 1

lambda_t = cv2.norm(P1[0,:].T)/cv2.norm(P2[0,:].T)


#I define the reconstruction matrix
Q = np.zeros((4,4))

Q[0,:] = P1[0,:].T
Q[1,:] = P1[1,:].T
Q[2,:] = lambda_t*P2[1,:].T - P1[1,:].T
Q[3,:] = P1[2,:].T

#I do the calculation to get my 3D coordinates
test = []
for i in range(0,XY_disp_reshape.shape[0]):
    a = np.dot(inv(Q),np.expand_dims(np.concatenate((XY_disp_reshape[i,:],np.ones((1))),axis=0),axis=1))
    test.append(a)

test = np.asarray(test)

XYZ = test[:,:,0].reshape(XY_disp.shape[0],XY_disp.shape[1],4)

回答 0

TLDR;对边缘更平滑的图像使用StereoSGBM(半全局块匹配),如果您希望它仍然更平滑,请使用一些后期过滤

OP没有提供原始图像,因此我使用Tsukuba的是Middlebury数据集

常规StereoBM的结果

StereoSGBM的结果(已调整)

我在文学中能找到的最好结果

有关详细信息,请参见此处的出版物。

后过滤示例(请参见下面的链接)

OP的问题的理论/其他考虑

校正后的校正图像中较大的黑色区域使我相信,对于这些校正效果不是很好。可能有多种原因在起作用,例如物理设置,校准时可能亮起的照明等等,但是为此有很多相机校准教程,我的理解是,您正在寻找一种方法来从未经校准的设置中获得更好的深度图(虽然不是100%清晰,但是标题似乎支持这一点,我认为这就是人们来这里寻找的地方)。

您的基本方法是正确的,但结果肯定可以改善。深度映射的这种形式不在产生最高质量的映射的那些之中(尤其是未经校准的)。最大的改进可能来自使用不同的立体声匹配算法。照明也可能会产生重大影响。正确的图像(至少对我的肉眼而言)似乎光线不足,可能会干扰重建。您可以先尝试将其亮度提高到另一个水平,或者在可能的情况下收集新图像。从这里开始,我将假定您无权使用原始相机,因此,我将考虑收集新图像,更改设置或执行校准超出范围。(如果您确实有权访问设置和摄像机,

您过去曾StereoBM计算过有效的视差(深度图),但StereoSGBM更适合此应用程序(更好地处理更平滑的边缘)。您可以在下面看到区别。

本文更深入地解释了这些差异:

块匹配专注于高纹理图像(想像一棵树的图片),而半全局块匹配则专注于子像素级匹配和纹理更平滑的图片(想像走廊的图片)。

如果没有任何明确的固有摄像机参数,有关摄像机设置的详细信息(例如焦距,摄像机之间的距离,到被摄物体的距离等),图像中的已知尺寸或运动(以使用来自运动的结构),则可以仅获得3D重建,直到投影变换;您也不会有比例感或旋转感,但是仍然可以生成相对深度图。您可能会遭受一些镜筒变形和其他变形的困扰,这些变形可以通过适当的相机校准来消除,但是只要相机不可怕(镜头系统不太失真)并且设置得相当漂亮,您就可以获得合理的结果。接近规范配置(这基本上意味着它们的方向应使其光轴尽可能接近平行,并且它们的视场充分重叠)。但是,这似乎不是OP的问题,因为他确实设法通过未校准的方法获得了正确的校正图像。

基本程序

  1. 在两张图像中至少找到5个匹配良好的点,可以用来计算基本矩阵(可以使用任何喜欢的检测器和匹配器,我保留了FLANN,但是使用ORB进行了检测,因为SIFT不在OpenCV的主版本中对于4.2.0)
  2. 用以下公式计算基本矩阵F findFundamentalMat
  3. 使用stereoRectifyUncalibrated和取消图像失真warpPerspective
  4. 计算视差(深度图) StereoSGBM

结果要好得多:

与ORB和FLANN匹配

未失真的图像(先左后右)


差距

立体声BM

此结果看起来与OP的问题(斑点,间隙,某些区域的错误深度)相似。

StereoSGBM(已调整)

这个结果看起来要好得多,并且使用与OP大致相同的方法,减去最终的视差计算,这让我认为,如果提供OP,OP将在他的图像上看到类似的改进。

后过滤

OpenCV文档中有一篇很好的文章。如果您需要非常平滑的地图,建议您查看一下。

上面的示例照片是MPI Sintel数据ambush_2集中场景的第1帧。

完整代码(在OpenCV 4.2.0上测试):

import cv2
import numpy as np
import matplotlib.pyplot as plt

imgL = cv2.imread("tsukuba_l.png", cv2.IMREAD_GRAYSCALE)  # left image
imgR = cv2.imread("tsukuba_r.png", cv2.IMREAD_GRAYSCALE)  # right image


def get_keypoints_and_descriptors(imgL, imgR):
    """Use ORB detector and FLANN matcher to get keypoints, descritpors,
    and corresponding matches that will be good for computing
    homography.
    """
    orb = cv2.ORB_create()
    kp1, des1 = orb.detectAndCompute(imgL, None)
    kp2, des2 = orb.detectAndCompute(imgR, None)

    ############## Using FLANN matcher ##############
    # Each keypoint of the first image is matched with a number of
    # keypoints from the second image. k=2 means keep the 2 best matches
    # for each keypoint (best matches = the ones with the smallest
    # distance measurement).
    FLANN_INDEX_LSH = 6
    index_params = dict(
        algorithm=FLANN_INDEX_LSH,
        table_number=6,  # 12
        key_size=12,  # 20
        multi_probe_level=1,
    )  # 2
    search_params = dict(checks=50)  # or pass empty dictionary
    flann = cv2.FlannBasedMatcher(index_params, search_params)
    flann_match_pairs = flann.knnMatch(des1, des2, k=2)
    return kp1, des1, kp2, des2, flann_match_pairs


def lowes_ratio_test(matches, ratio_threshold=0.6):
    """Filter matches using the Lowe's ratio test.

    The ratio test checks if matches are ambiguous and should be
    removed by checking that the two distances are sufficiently
    different. If they are not, then the match at that keypoint is
    ignored.

    /programming/51197091/how-does-the-lowes-ratio-test-work
    """
    filtered_matches = []
    for m, n in matches:
        if m.distance < ratio_threshold * n.distance:
            filtered_matches.append(m)
    return filtered_matches


def draw_matches(imgL, imgR, kp1, des1, kp2, des2, flann_match_pairs):
    """Draw the first 8 mathces between the left and right images."""
    # https://docs.opencv.org/4.2.0/d4/d5d/group__features2d__draw.html
    # https://docs.opencv.org/2.4/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html
    img = cv2.drawMatches(
        imgL,
        kp1,
        imgR,
        kp2,
        flann_match_pairs[:8],
        None,
        flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
    )
    cv2.imshow("Matches", img)
    cv2.imwrite("ORB_FLANN_Matches.png", img)
    cv2.waitKey(0)


def compute_fundamental_matrix(matches, kp1, kp2, method=cv2.FM_RANSAC):
    """Use the set of good mathces to estimate the Fundamental Matrix.

    See  https://en.wikipedia.org/wiki/Eight-point_algorithm#The_normalized_eight-point_algorithm
    for more info.
    """
    pts1, pts2 = [], []
    fundamental_matrix, inliers = None, None
    for m in matches[:8]:
        pts1.append(kp1[m.queryIdx].pt)
        pts2.append(kp2[m.trainIdx].pt)
    if pts1 and pts2:
        # You can play with the Threshold and confidence values here
        # until you get something that gives you reasonable results. I
        # used the defaults
        fundamental_matrix, inliers = cv2.findFundamentalMat(
            np.float32(pts1),
            np.float32(pts2),
            method=method,
            # ransacReprojThreshold=3,
            # confidence=0.99,
        )
    return fundamental_matrix, inliers, pts1, pts2


############## Find good keypoints to use ##############
kp1, des1, kp2, des2, flann_match_pairs = get_keypoints_and_descriptors(imgL, imgR)
good_matches = lowes_ratio_test(flann_match_pairs, 0.2)
draw_matches(imgL, imgR, kp1, des1, kp2, des2, good_matches)


############## Compute Fundamental Matrix ##############
F, I, points1, points2 = compute_fundamental_matrix(good_matches, kp1, kp2)


############## Stereo rectify uncalibrated ##############
h1, w1 = imgL.shape
h2, w2 = imgR.shape
thresh = 0
_, H1, H2 = cv2.stereoRectifyUncalibrated(
    np.float32(points1), np.float32(points2), F, imgSize=(w1, h1), threshold=thresh,
)

############## Undistort (Rectify) ##############
imgL_undistorted = cv2.warpPerspective(imgL, H1, (w1, h1))
imgR_undistorted = cv2.warpPerspective(imgR, H2, (w2, h2))
cv2.imwrite("undistorted_L.png", imgL_undistorted)
cv2.imwrite("undistorted_R.png", imgR_undistorted)

############## Calculate Disparity (Depth Map) ##############

# Using StereoBM
stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
disparity_BM = stereo.compute(imgL_undistorted, imgR_undistorted)
plt.imshow(disparity_BM, "gray")
plt.colorbar()
plt.show()

# Using StereoSGBM
# Set disparity parameters. Note: disparity range is tuned according to
#  specific parameters obtained through trial and error.
win_size = 2
min_disp = -4
max_disp = 9
num_disp = max_disp - min_disp  # Needs to be divisible by 16
stereo = cv2.StereoSGBM_create(
    minDisparity=min_disp,
    numDisparities=num_disp,
    blockSize=5,
    uniquenessRatio=5,
    speckleWindowSize=5,
    speckleRange=5,
    disp12MaxDiff=2,
    P1=8 * 3 * win_size ** 2,
    P2=32 * 3 * win_size ** 2,
)
disparity_SGBM = stereo.compute(imgL_undistorted, imgR_undistorted)
plt.imshow(disparity_SGBM, "gray")
plt.colorbar()
plt.show()

TLDR; Use StereoSGBM (Semi Global Block Matching) for images with smoother edges and use some post filtering if you want it smoother still

OP didn’t provide original images, so I’m using Tsukuba from the Middlebury data set.

Result with regular StereoBM

Result with StereoSGBM (tuned)

Best result I could find in literature

See the publication here for details.

Example of post filtering (see link below)

Theory/Other considerations from OP’s question

The large black areas of your calibrated rectified images would lead me to believe that for those, calibration was not done very well. There’s a variety of reasons that could be at play, maybe the physical setup, maybe lighting when you did calibration, etc., but there are plenty of camera calibration tutorials out there for that and my understanding is that you are asking for a way to get a better depth map from an uncalibrated setup (this isn’t 100% clear, but the title seems to support this and I think that’s what people will come here to try to find).

Your basic approach is correct, but the results can definitely be improved. This form of depth mapping is not among those that produce the highest quality maps (especially being uncalibrated). The biggest improvement will likely come from using a different stereo matching algorithm. The lighting may also be having a significant effect. The right image (at least to my naked eye) appears to be less well lit which could interfere with the reconstruction. You could first try brightening it to the same level as the other, or gather new images if that is possible. From here out, I’ll assume you have no access to the original cameras, so I’ll consider gathering new images, altering the setup, or performing calibration to be out of scope. (If you do have access to the setup and cameras, then I would suggest checking calibration and using a calibrated method as this will work better).

You used StereoBM for calculating your disparity (depth map) which does work, but StereoSGBM is much better suited for this application (it handles smoother edges better). You can see the difference below.

This article explains the differences in more depth:

Block matching focuses on high texture images (think a picture of a tree) and semi-global block matching will focus on sub pixel level matching and pictures with more smooth textures (think a picture of a hallway).

Without any explicit intrinsic camera parameters, specifics about the camera setup (like focal distance, distance between the cameras, distance to the subject, etc.), a known dimension in the image, or motion (to use structure from motion), you can only obtain 3D reconstruction up to a projective transform; you won’t have a sense of scale or necessarily rotation either, but you can still generate a relative depth map. You will likely suffer from some barrel and other distortions which could be removed with proper camera calibration, but you can get reasonable results without it as long as the cameras aren’t terrible (lens system isn’t too distorted) and are set up pretty close to canonical configuration (which basically means they are oriented such that their optical axes are as close to parallel as possible, and their fields of view overlap sufficiently). This doesn’t however appear to be the OPs issue as he did manage to get alright rectified images with the uncalibrated method.

Basic Procedure

  1. Find at least 5 well-matched points in both images you can use to calculate the Fundamental Matrix (you can use any detector and matcher you like, I kept FLANN but used ORB to do detection as SIFT isn’t in the main version of OpenCV for 4.2.0)
  2. Calculate the Fundamental Matrix, F, with findFundamentalMat
  3. Undistort your images with stereoRectifyUncalibrated and warpPerspective
  4. Calculate Disparity (Depth Map) with StereoSGBM

The results are much better:

Matches with ORB and FLANN

Undistorted images (left, then right)


Disparity

StereoBM

This result looks similar to the OPs problems (speckling, gaps, wrong depths in some areas).

StereoSGBM (tuned)

This result looks much better and uses roughly the same method as the OP, minus the final disparity calculation, making me think the OP would see similar improvements on his images, had they been provided.

Post filtering

There’s a good article about this in the OpenCV docs. I’d recommend looking at it if you need really smooth maps.

The example photos above are frame 1 from the scene ambush_2 in the MPI Sintel Dataset.

Full code (Tested on OpenCV 4.2.0):

import cv2
import numpy as np
import matplotlib.pyplot as plt

imgL = cv2.imread("tsukuba_l.png", cv2.IMREAD_GRAYSCALE)  # left image
imgR = cv2.imread("tsukuba_r.png", cv2.IMREAD_GRAYSCALE)  # right image


def get_keypoints_and_descriptors(imgL, imgR):
    """Use ORB detector and FLANN matcher to get keypoints, descritpors,
    and corresponding matches that will be good for computing
    homography.
    """
    orb = cv2.ORB_create()
    kp1, des1 = orb.detectAndCompute(imgL, None)
    kp2, des2 = orb.detectAndCompute(imgR, None)

    ############## Using FLANN matcher ##############
    # Each keypoint of the first image is matched with a number of
    # keypoints from the second image. k=2 means keep the 2 best matches
    # for each keypoint (best matches = the ones with the smallest
    # distance measurement).
    FLANN_INDEX_LSH = 6
    index_params = dict(
        algorithm=FLANN_INDEX_LSH,
        table_number=6,  # 12
        key_size=12,  # 20
        multi_probe_level=1,
    )  # 2
    search_params = dict(checks=50)  # or pass empty dictionary
    flann = cv2.FlannBasedMatcher(index_params, search_params)
    flann_match_pairs = flann.knnMatch(des1, des2, k=2)
    return kp1, des1, kp2, des2, flann_match_pairs


def lowes_ratio_test(matches, ratio_threshold=0.6):
    """Filter matches using the Lowe's ratio test.

    The ratio test checks if matches are ambiguous and should be
    removed by checking that the two distances are sufficiently
    different. If they are not, then the match at that keypoint is
    ignored.

    https://stackoverflow.com/questions/51197091/how-does-the-lowes-ratio-test-work
    """
    filtered_matches = []
    for m, n in matches:
        if m.distance < ratio_threshold * n.distance:
            filtered_matches.append(m)
    return filtered_matches


def draw_matches(imgL, imgR, kp1, des1, kp2, des2, flann_match_pairs):
    """Draw the first 8 mathces between the left and right images."""
    # https://docs.opencv.org/4.2.0/d4/d5d/group__features2d__draw.html
    # https://docs.opencv.org/2.4/modules/features2d/doc/common_interfaces_of_descriptor_matchers.html
    img = cv2.drawMatches(
        imgL,
        kp1,
        imgR,
        kp2,
        flann_match_pairs[:8],
        None,
        flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS,
    )
    cv2.imshow("Matches", img)
    cv2.imwrite("ORB_FLANN_Matches.png", img)
    cv2.waitKey(0)


def compute_fundamental_matrix(matches, kp1, kp2, method=cv2.FM_RANSAC):
    """Use the set of good mathces to estimate the Fundamental Matrix.

    See  https://en.wikipedia.org/wiki/Eight-point_algorithm#The_normalized_eight-point_algorithm
    for more info.
    """
    pts1, pts2 = [], []
    fundamental_matrix, inliers = None, None
    for m in matches[:8]:
        pts1.append(kp1[m.queryIdx].pt)
        pts2.append(kp2[m.trainIdx].pt)
    if pts1 and pts2:
        # You can play with the Threshold and confidence values here
        # until you get something that gives you reasonable results. I
        # used the defaults
        fundamental_matrix, inliers = cv2.findFundamentalMat(
            np.float32(pts1),
            np.float32(pts2),
            method=method,
            # ransacReprojThreshold=3,
            # confidence=0.99,
        )
    return fundamental_matrix, inliers, pts1, pts2


############## Find good keypoints to use ##############
kp1, des1, kp2, des2, flann_match_pairs = get_keypoints_and_descriptors(imgL, imgR)
good_matches = lowes_ratio_test(flann_match_pairs, 0.2)
draw_matches(imgL, imgR, kp1, des1, kp2, des2, good_matches)


############## Compute Fundamental Matrix ##############
F, I, points1, points2 = compute_fundamental_matrix(good_matches, kp1, kp2)


############## Stereo rectify uncalibrated ##############
h1, w1 = imgL.shape
h2, w2 = imgR.shape
thresh = 0
_, H1, H2 = cv2.stereoRectifyUncalibrated(
    np.float32(points1), np.float32(points2), F, imgSize=(w1, h1), threshold=thresh,
)

############## Undistort (Rectify) ##############
imgL_undistorted = cv2.warpPerspective(imgL, H1, (w1, h1))
imgR_undistorted = cv2.warpPerspective(imgR, H2, (w2, h2))
cv2.imwrite("undistorted_L.png", imgL_undistorted)
cv2.imwrite("undistorted_R.png", imgR_undistorted)

############## Calculate Disparity (Depth Map) ##############

# Using StereoBM
stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
disparity_BM = stereo.compute(imgL_undistorted, imgR_undistorted)
plt.imshow(disparity_BM, "gray")
plt.colorbar()
plt.show()

# Using StereoSGBM
# Set disparity parameters. Note: disparity range is tuned according to
#  specific parameters obtained through trial and error.
win_size = 2
min_disp = -4
max_disp = 9
num_disp = max_disp - min_disp  # Needs to be divisible by 16
stereo = cv2.StereoSGBM_create(
    minDisparity=min_disp,
    numDisparities=num_disp,
    blockSize=5,
    uniquenessRatio=5,
    speckleWindowSize=5,
    speckleRange=5,
    disp12MaxDiff=2,
    P1=8 * 3 * win_size ** 2,
    P2=32 * 3 * win_size ** 2,
)
disparity_SGBM = stereo.compute(imgL_undistorted, imgR_undistorted)
plt.imshow(disparity_SGBM, "gray")
plt.colorbar()
plt.show()


回答 1

可能存在几个导致质量低下的问题Depth ChannelDisparity Channel导致我们产生质量低下的立体声序列。以下是其中的6个问题:

可能的问题我

  • 公式不完整

作为一个词uncalibrated意味着,stereoRectifyUncalibrated实例方法计算整改转化为你,如果你不知道或者不知道您的立体声对,并在环境中的相对位置的内部参数。

cv.StereoRectifyUncalibrated(pts1, pts2, fm, imgSize, rhm1, rhm2, thres)

哪里:

# pts1    –> an array of feature points in a first camera
# pts2    –> an array of feature points in a first camera
# fm      –> input fundamental matrix
# imgSize -> size of an image
# rhm1    -> output rectification homography matrix for a first image
# rhm2    -> output rectification homography matrix for a second image
# thres   –> optional threshold used to filter out outliers

您的方法看起来像这样:

cv2.StereoRectifyUncalibrated(p1fNew, p2fNew, F, (2048, 2048))

所以,你不要考虑三个参数:rhm1rhm2thres。如果为a threshold > 0,则在计算单应性之前会拒绝所有不符合极线几何的点对。否则,所有点均视为内点。该公式如下所示:

(pts2[i]^t * fm * pts1[i]) > thres

# t   –> translation vector between coordinate systems of cameras

因此,我认为由于公式计算不完整,可能会出现视觉错误。

您可以在官方资源上阅读相机校准和3D重建


可能的问题二

  • 轴间距离

interaxial distance左右相机镜头之间必须牢固not greater than 200 mm。当interaxial distance大于interocular距离时,这种效果称为hyperstereoscopyhyperdivergence,不仅导致场景中的深度夸张,而且导致观看者的身体不便。阅读Autodesk的“立体电影制作白皮书”以了解有关此主题的更多信息。


可能的问题三

  • 平行摄影机与Toed-In摄影机模式

Disparity Map由于不正确的相机模式计算,可能会导致视觉上的误差。许多立体学家更喜欢,Toe-In camera mode但例如皮克斯(Pixar)更喜欢Parallel camera mode


可能的问题四

  • 垂直对齐

在立体视觉中,如果发生垂直偏移(即使其中一个视图向上偏移1毫米),也会破坏稳固的立体声体验。因此,在生成Disparity Map音频之前,必须确保立体声对的左右视图已相应对齐。请参阅Technicolor立体镜白皮书,了解立体声方面的15个常见问题。

立体声整流矩阵:

   ┌                  ┐
   |  f   0   cx  tx  |
   |  0   f   cy  ty  |   # use "ty" value to fix vertical shift in one image
   |  0   0   1   0   |
   └                  ┘

这是一个StereoRectify方法:

cv.StereoRectify(cameraMatrix1, cameraMatrix2, distCoeffs1, distCoeffs2, imageSize, R, T, R1, R2, P1, P2, Q=None, flags=CV_CALIB_ZERO_DISPARITY, alpha=-1, newImageSize=(0, 0)) -> (roi1, roi2)


可能的问题五

  • 镜头变形

镜头失真是立体声合成中非常重要的主题。在生成之前,Disparity Map您需要先取消左右视图的失真,然后再生成视差通道,然后再次对这两个视图进行重新扭曲。


可能的问题六

  • 低质量深度通道,无抗锯齿

为了创建高质量的图像,Disparity Map您需要左右Depth Channels必须预先生成。在3D封装中工作时,只需单击一下即可渲染高质量的深度通道(边缘清晰)。但是从视频序列中生成高质量的深度通道并不容易,因为立体声对必须在您的环境中移动才能为将来的深度运动算法生成初始数据。如果帧中没有运动,则深度通道将非常差。

此外,Depth通道本身还有另一个缺点–因为的边缘没有抗锯齿,所以的边缘与RGB的边缘不匹配。


视差渠道代码段:

在这里,我想代表一种生成的快速方法Disparity Map

import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt

imageLeft = cv.imread('paris_left.png', 0)
imageRight = cv.imread('paris_right.png', 0)
stereo = cv.StereoBM_create(numDisparities=16, blockSize=15)
disparity = stereo.compute(imageLeft, imageRight)
plt.imshow(disparity, 'gray')
plt.show()

There might be several possible issues resulting in low-quality Depth Channel and Disparity Channel what leads us to low-quality stereo sequence. Here are 6 of those issues:

Possible issue I

  • Incomplete Formula

As a word uncalibrated implies, stereoRectifyUncalibrated instance method calculates a rectification transformations for you, in case you don’t know or can’t know intrinsic parameters of your stereo pair and its relative position in the environment.

cv.StereoRectifyUncalibrated(pts1, pts2, fm, imgSize, rhm1, rhm2, thres)

where:

# pts1    –> an array of feature points in a first camera
# pts2    –> an array of feature points in a first camera
# fm      –> input fundamental matrix
# imgSize -> size of an image
# rhm1    -> output rectification homography matrix for a first image
# rhm2    -> output rectification homography matrix for a second image
# thres   –> optional threshold used to filter out outliers

And your method looks this way:

cv2.StereoRectifyUncalibrated(p1fNew, p2fNew, F, (2048, 2048))

So, you do not take into account three parameters: rhm1, rhm2 and thres. If a threshold > 0, all point pairs that don’t comply with a epipolar geometry are rejected prior to computing the homographies. Otherwise, all points are considered inliers. This formula looks like this:

(pts2[i]^t * fm * pts1[i]) > thres

# t   –> translation vector between coordinate systems of cameras

Thus, I believe that visual inaccuracies might appear due to an incomplete formula’s calculation.

You can read Camera Calibration and 3D Reconstruction on official resource.


Possible issue II

  • Interaxial Distance

A robust interaxial distance between left and right camera lenses must be not greater than 200 mm. When the interaxial distance is larger than the interocular distance, the effect is called hyperstereoscopy or hyperdivergence and results not only in depth exaggeration in the scene but also in viewer’s physical inconvenience. Read Autodesk’s Stereoscopic Filmmaking Whitepaper to find out more on this topic.


Possible issue III

  • Parallel vs Toed-In camera mode

Visual inaccuracies in resulted Disparity Map may occur due to incorrect Camera Mode calculation. Many stereographers prefer Toe-In camera mode but Pixar, for example, prefers Parallel camera mode.


Possible issue IV

  • Vertical Alignment

In stereoscopy, if a vertical shift occurs (even if one of the views is shifted up by 1 mm) it ruins a robust stereo experience. So, before generating Disparity Map you must be sure that left and right views of your stereo pair are accordingly aligned. Look at Technicolor Sterreoscopic Whitepaper about 15 common problems in stereo.

Stereo Rectification Matrix:

   ┌                  ┐
   |  f   0   cx  tx  |
   |  0   f   cy  ty  |   # use "ty" value to fix vertical shift in one image
   |  0   0   1   0   |
   └                  ┘

Here’s a StereoRectify method:

cv.StereoRectify(cameraMatrix1, cameraMatrix2, distCoeffs1, distCoeffs2, imageSize, R, T, R1, R2, P1, P2, Q=None, flags=CV_CALIB_ZERO_DISPARITY, alpha=-1, newImageSize=(0, 0)) -> (roi1, roi2)


Possible issue V

  • Lens Distortion

Lens Distortion is very important topic in stereo composition. Before generating a Disparity Map you need to undistort left and right views, after this generate a disparity channel, and then redistort both views again.


Possible issue VI

  • Low-quality Depth channel without anti-aliasing

For creating a high-quality Disparity Map you need left and right Depth Channels that must be pre-generated. When you work in 3D package you can render a high-quality Depth Channel (with crisp edges) with just one click. But generating a high-quality depth channel from video sequence is not easy because stereo pair has to move in your environment for producing an initial data for future depth-from-motion algorithm. If there’s no motion in a frame a depth channel will be extremely poor.

Also, Depth channel itself has one more drawback – its edges do not match the edges of the RGB because it has no anti-aliasing.


Disparity channel code snippet:

Here I’d like to represent a quick approach to generate a Disparity Map:

import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt

imageLeft = cv.imread('paris_left.png', 0)
imageRight = cv.imread('paris_right.png', 0)
stereo = cv.StereoBM_create(numDisparities=16, blockSize=15)
disparity = stereo.compute(imageLeft, imageRight)
plt.imshow(disparity, 'gray')
plt.show()


Python-提取和保存视频帧

问题:Python-提取和保存视频帧

因此,我已按照本教程进行操作,但似乎没有任何作用。根本没有。等待几秒钟,然后关闭程序。此代码有什么问题?

import cv2
vidcap = cv2.VideoCapture('Compton.mp4')
success,image = vidcap.read()
count = 0
success = True
while success:
  success,image = vidcap.read()
  cv2.imwrite("frame%d.jpg" % count, image)     # save frame as JPEG file
  if cv2.waitKey(10) == 27:                     # exit if Escape is hit
      break
  count += 1

另外,在评论中说这将帧数限制为1000?为什么?

编辑:我尝试先做,success = True但这没有帮助。它仅创建了一个0字节的图像。

So I’ve followed this tutorial but it doesn’t seem to do anything. Simply nothing. It waits a few seconds and closes the program. What is wrong with this code?

import cv2
vidcap = cv2.VideoCapture('Compton.mp4')
success,image = vidcap.read()
count = 0
success = True
while success:
  success,image = vidcap.read()
  cv2.imwrite("frame%d.jpg" % count, image)     # save frame as JPEG file
  if cv2.waitKey(10) == 27:                     # exit if Escape is hit
      break
  count += 1

Also, in the comments it says that this limits the frames to 1000? Why?

EDIT: I tried doing success = True first but that didn’t help. It only created one image that was 0 bytes.


回答 0

这里下载此视频,以便我们拥有用于测试的相同视频文件。确保将mp4文件放在python代码的同一目录中。然后还要确保从同一目录运行python解释器。

然后修改代码,waitKey浪费时间也没有窗口,它无法捕获键盘事件。另外,我们打印该success值以确保它已成功读取帧。

import cv2
vidcap = cv2.VideoCapture('big_buck_bunny_720p_5mb.mp4')
success,image = vidcap.read()
count = 0
while success:
  cv2.imwrite("frame%d.jpg" % count, image)     # save frame as JPEG file      
  success,image = vidcap.read()
  print('Read a new frame: ', success)
  count += 1

怎么样了?

From here download this video so we have the same video file for the test. Make sure to have that mp4 file in the same directory of your python code. Then also make sure to run the python interpreter from the same directory.

Then modify the code, ditch waitKey that’s wasting time also without a window it cannot capture the keyboard events. Also we print the success value to make sure it’s reading the frames successfully.

import cv2
vidcap = cv2.VideoCapture('big_buck_bunny_720p_5mb.mp4')
success,image = vidcap.read()
count = 0
while success:
  cv2.imwrite("frame%d.jpg" % count, image)     # save frame as JPEG file      
  success,image = vidcap.read()
  print('Read a new frame: ', success)
  count += 1

How does that go?


回答 1

如果有人不想提取每一帧,但想每秒钟提取一帧,则针对稍有不同的情况扩展此问题(@ user2700065的答案)。因此,一分钟的视频将提供60帧(图像)。

import sys
import argparse

import cv2
print(cv2.__version__)

def extractImages(pathIn, pathOut):
    count = 0
    vidcap = cv2.VideoCapture(pathIn)
    success,image = vidcap.read()
    success = True
    while success:
        vidcap.set(cv2.CAP_PROP_POS_MSEC,(count*1000))    # added this line 
        success,image = vidcap.read()
        print ('Read a new frame: ', success)
        cv2.imwrite( pathOut + "\\frame%d.jpg" % count, image)     # save frame as JPEG file
        count = count + 1

if __name__=="__main__":
    a = argparse.ArgumentParser()
    a.add_argument("--pathIn", help="path to video")
    a.add_argument("--pathOut", help="path to images")
    args = a.parse_args()
    print(args)
    extractImages(args.pathIn, args.pathOut)

To extend on this question (& answer by @user2700065) for a slightly different cases, if anyone does not want to extract every frame but wants to extract frame every one second. So a 1-minute video will give 60 frames(images).

import sys
import argparse

import cv2
print(cv2.__version__)

def extractImages(pathIn, pathOut):
    count = 0
    vidcap = cv2.VideoCapture(pathIn)
    success,image = vidcap.read()
    success = True
    while success:
        vidcap.set(cv2.CAP_PROP_POS_MSEC,(count*1000))    # added this line 
        success,image = vidcap.read()
        print ('Read a new frame: ', success)
        cv2.imwrite( pathOut + "\\frame%d.jpg" % count, image)     # save frame as JPEG file
        count = count + 1

if __name__=="__main__":
    a = argparse.ArgumentParser()
    a.add_argument("--pathIn", help="path to video")
    a.add_argument("--pathOut", help="path to images")
    args = a.parse_args()
    print(args)
    extractImages(args.pathIn, args.pathOut)

回答 2

这是来自@GShocked的python 3.x以前答案的调整,我将其发布到注释中,但信誉不足

import sys
import argparse

import cv2
print(cv2.__version__)

def extractImages(pathIn, pathOut):
    vidcap = cv2.VideoCapture(pathIn)
    success,image = vidcap.read()
    count = 0
    success = True
    while success:
      success,image = vidcap.read()
      print ('Read a new frame: ', success)
      cv2.imwrite( pathOut + "\\frame%d.jpg" % count, image)     # save frame as JPEG file
      count += 1

if __name__=="__main__":
    print("aba")
    a = argparse.ArgumentParser()
    a.add_argument("--pathIn", help="path to video")
    a.add_argument("--pathOut", help="path to images")
    args = a.parse_args()
    print(args)
    extractImages(args.pathIn, args.pathOut)

This is a tweak from previous answer for python 3.x from @GShocked, I would post it to the comment, but dont have enough reputation

import sys
import argparse

import cv2
print(cv2.__version__)

def extractImages(pathIn, pathOut):
    vidcap = cv2.VideoCapture(pathIn)
    success,image = vidcap.read()
    count = 0
    success = True
    while success:
      success,image = vidcap.read()
      print ('Read a new frame: ', success)
      cv2.imwrite( pathOut + "\\frame%d.jpg" % count, image)     # save frame as JPEG file
      count += 1

if __name__=="__main__":
    print("aba")
    a = argparse.ArgumentParser()
    a.add_argument("--pathIn", help="path to video")
    a.add_argument("--pathOut", help="path to images")
    args = a.parse_args()
    print(args)
    extractImages(args.pathIn, args.pathOut)

回答 3

此功能可将大多数视频格式转换为视频中的帧数。它的工作原理上Python3OpenCV 3+

import cv2
import time
import os

def video_to_frames(input_loc, output_loc):
    """Function to extract frames from input video file
    and save them as separate frames in an output directory.
    Args:
        input_loc: Input video file.
        output_loc: Output directory to save the frames.
    Returns:
        None
    """
    try:
        os.mkdir(output_loc)
    except OSError:
        pass
    # Log the time
    time_start = time.time()
    # Start capturing the feed
    cap = cv2.VideoCapture(input_loc)
    # Find the number of frames
    video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
    print ("Number of frames: ", video_length)
    count = 0
    print ("Converting video..\n")
    # Start converting the video
    while cap.isOpened():
        # Extract the frame
        ret, frame = cap.read()
        # Write the results back to output location.
        cv2.imwrite(output_loc + "/%#05d.jpg" % (count+1), frame)
        count = count + 1
        # If there are no more frames left
        if (count > (video_length-1)):
            # Log the time again
            time_end = time.time()
            # Release the feed
            cap.release()
            # Print stats
            print ("Done extracting frames.\n%d frames extracted" % count)
            print ("It took %d seconds forconversion." % (time_end-time_start))
            break

if __name__=="__main__":

    input_loc = '/path/to/video/00009.MTS'
    output_loc = '/path/to/output/frames/'
    video_to_frames(input_loc, output_loc)

它支持.mts和普通文件,例如.mp4.avi。在.mts文件上尝试和测试。奇迹般有效。

This is Function which will convert most of the video formats to number of frames there are in the video. It works on Python3 with OpenCV 3+

import cv2
import time
import os

def video_to_frames(input_loc, output_loc):
    """Function to extract frames from input video file
    and save them as separate frames in an output directory.
    Args:
        input_loc: Input video file.
        output_loc: Output directory to save the frames.
    Returns:
        None
    """
    try:
        os.mkdir(output_loc)
    except OSError:
        pass
    # Log the time
    time_start = time.time()
    # Start capturing the feed
    cap = cv2.VideoCapture(input_loc)
    # Find the number of frames
    video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
    print ("Number of frames: ", video_length)
    count = 0
    print ("Converting video..\n")
    # Start converting the video
    while cap.isOpened():
        # Extract the frame
        ret, frame = cap.read()
        # Write the results back to output location.
        cv2.imwrite(output_loc + "/%#05d.jpg" % (count+1), frame)
        count = count + 1
        # If there are no more frames left
        if (count > (video_length-1)):
            # Log the time again
            time_end = time.time()
            # Release the feed
            cap.release()
            # Print stats
            print ("Done extracting frames.\n%d frames extracted" % count)
            print ("It took %d seconds forconversion." % (time_end-time_start))
            break

if __name__=="__main__":

    input_loc = '/path/to/video/00009.MTS'
    output_loc = '/path/to/output/frames/'
    video_to_frames(input_loc, output_loc)

It supports .mts and normal files like .mp4 and .avi. Tried and Tested on .mts files. Works like a Charm.


回答 4

经过大量有关如何将帧转换为视频的研究,我创建了此功能,希望对您有所帮助。为此,我们需要opencv:

import cv2
import numpy as np
import os

def frames_to_video(inputpath,outputpath,fps):
   image_array = []
   files = [f for f in os.listdir(inputpath) if isfile(join(inputpath, f))]
   files.sort(key = lambda x: int(x[5:-4]))
   for i in range(len(files)):
       img = cv2.imread(inputpath + files[i])
       size =  (img.shape[1],img.shape[0])
       img = cv2.resize(img,size)
       image_array.append(img)
   fourcc = cv2.VideoWriter_fourcc('D', 'I', 'V', 'X')
   out = cv2.VideoWriter(outputpath,fourcc, fps, size)
   for i in range(len(image_array)):
       out.write(image_array[i])
   out.release()


inputpath = 'folder path'
outpath =  'video file path/video.mp4'
fps = 29
frames_to_video(inputpath,outpath,fps)

根据您自己的本地位置更改fps(帧/秒),输入文件夹路径和输出文件夹路径的值

After a lot of research on how to convert frames to video I have created this function hope this helps. We require opencv for this:

import cv2
import numpy as np
import os

def frames_to_video(inputpath,outputpath,fps):
   image_array = []
   files = [f for f in os.listdir(inputpath) if isfile(join(inputpath, f))]
   files.sort(key = lambda x: int(x[5:-4]))
   for i in range(len(files)):
       img = cv2.imread(inputpath + files[i])
       size =  (img.shape[1],img.shape[0])
       img = cv2.resize(img,size)
       image_array.append(img)
   fourcc = cv2.VideoWriter_fourcc('D', 'I', 'V', 'X')
   out = cv2.VideoWriter(outputpath,fourcc, fps, size)
   for i in range(len(image_array)):
       out.write(image_array[i])
   out.release()


inputpath = 'folder path'
outpath =  'video file path/video.mp4'
fps = 29
frames_to_video(inputpath,outpath,fps)

change the value of fps(frames per second),input folder path and output folder path according to your own local locations


回答 5

先前的答案丢失了第一帧。而且最好将图像存储在文件夹中。

# create a folder to store extracted images
import os
folder = 'test'  
os.mkdir(folder)
# use opencv to do the job
import cv2
print(cv2.__version__)  # my version is 3.1.0
vidcap = cv2.VideoCapture('test_video.mp4')
count = 0
while True:
    success,image = vidcap.read()
    if not success:
        break
    cv2.imwrite(os.path.join(folder,"frame{:d}.jpg".format(count)), image)     # save frame as JPEG file
    count += 1
print("{} images are extacted in {}.".format(count,folder))

顺便说一下,您可以通过VLC 检查帧率。转到Windows->媒体信息->编解码器详细信息

The previous answers have lost the first frame. And it will be nice to store the images in a folder.

# create a folder to store extracted images
import os
folder = 'test'  
os.mkdir(folder)
# use opencv to do the job
import cv2
print(cv2.__version__)  # my version is 3.1.0
vidcap = cv2.VideoCapture('test_video.mp4')
count = 0
while True:
    success,image = vidcap.read()
    if not success:
        break
    cv2.imwrite(os.path.join(folder,"frame{:d}.jpg".format(count)), image)     # save frame as JPEG file
    count += 1
print("{} images are extacted in {}.".format(count,folder))

By the way, you can check the frame rate by VLC. Go to windows -> media information -> codec details


回答 6

此代码从视频中提取帧并将帧保存为.jpg formate

import cv2
import numpy as np
import os

# set video file path of input video with name and extension
vid = cv2.VideoCapture('VideoPath')


if not os.path.exists('images'):
    os.makedirs('images')

#for frame identity
index = 0
while(True):
    # Extract images
    ret, frame = vid.read()
    # end of frames
    if not ret: 
        break
    # Saves images
    name = './images/frame' + str(index) + '.jpg'
    print ('Creating...' + name)
    cv2.imwrite(name, frame)

    # next frame
    index += 1

This code extract frames from the video and save the frames in .jpg formate

import cv2
import numpy as np
import os

# set video file path of input video with name and extension
vid = cv2.VideoCapture('VideoPath')


if not os.path.exists('images'):
    os.makedirs('images')

#for frame identity
index = 0
while(True):
    # Extract images
    ret, frame = vid.read()
    # end of frames
    if not ret: 
        break
    # Saves images
    name = './images/frame' + str(index) + '.jpg'
    print ('Creating...' + name)
    cv2.imwrite(name, frame)

    # next frame
    index += 1

回答 7

我正在通过Anaconda的Spyder软件使用Python。使用@Gshocked在此线程问题中列出的原始代码,该代码不起作用(Python无法读取mp4文件)。因此,我下载了OpenCV 3.2,并从“ bin”文件夹中复制了“ opencv_ffmpeg320.dll”和“ opencv_ffmpeg320_64.dll”。我将这两个dll文件都粘贴到了Anaconda的“ Dlls”文件夹中。

Anaconda也有一个“ pckgs”文件夹…我复制并粘贴了我下载到Anaconda“ pckgs”文件夹中的整个“ OpenCV 3.2”文件夹。

最后,Anaconda有一个“ Library”文件夹,其中有一个“ bin”子文件夹。我将“ opencv_ffmpeg320.dll”和“ opencv_ffmpeg320_64.dll”文件粘贴到该文件夹​​中。

关闭并重新启动Spyder之后,代码即可正常工作。我不确定这三种方法中的哪一种有效,而且我懒得回头再去弄清楚。但这很奏效,欢呼!

I am using Python via Anaconda’s Spyder software. Using the original code listed in the question of this thread by @Gshocked, the code does not work (the python won’t read the mp4 file). So I downloaded OpenCV 3.2 and copied “opencv_ffmpeg320.dll” and “opencv_ffmpeg320_64.dll” from the “bin” folder. I pasted both of these dll files to Anaconda’s “Dlls” folder.

Anaconda also has a “pckgs” folder…I copied and pasted the entire “OpenCV 3.2” folder that I downloaded to the Anaconda “pckgs” folder.

Finally, Anaconda has a “Library” folder which has a “bin” subfolder. I pasted the “opencv_ffmpeg320.dll” and “opencv_ffmpeg320_64.dll” files to that folder.

After closing and restarting Spyder, the code worked. I’m not sure which of the three methods worked, and I’m too lazy to go back and figure it out. But it works so, cheers!


回答 8

此功能以1 fps的速度从视频中提取图像,此外它还标识最后一帧并停止读取:

import cv2
import numpy as np

def extract_image_one_fps(video_source_path):

    vidcap = cv2.VideoCapture(video_source_path)
    count = 0
    success = True
    while success:
      vidcap.set(cv2.CAP_PROP_POS_MSEC,(count*1000))      
      success,image = vidcap.read()

      ## Stop when last frame is identified
      image_last = cv2.imread("frame{}.png".format(count-1))
      if np.array_equal(image,image_last):
          break

      cv2.imwrite("frame%d.png" % count, image)     # save frame as PNG file
      print '{}.sec reading a new frame: {} '.format(count,success)
      count += 1

This function extracts images from video with 1 fps, IN ADDITION it identifies the last frame and stops reading also:

import cv2
import numpy as np

def extract_image_one_fps(video_source_path):

    vidcap = cv2.VideoCapture(video_source_path)
    count = 0
    success = True
    while success:
      vidcap.set(cv2.CAP_PROP_POS_MSEC,(count*1000))      
      success,image = vidcap.read()

      ## Stop when last frame is identified
      image_last = cv2.imread("frame{}.png".format(count-1))
      if np.array_equal(image,image_last):
          break

      cv2.imwrite("frame%d.png" % count, image)     # save frame as PNG file
      print '{}.sec reading a new frame: {} '.format(count,success)
      count += 1

回答 9

以下脚本将每隔半秒提取一次文件夹中所有视频的帧。(适用于python 3.7)

import cv2
import os
listing = os.listdir(r'D:/Images/AllVideos')
count=1
for vid in listing:
    vid = r"D:/Images/AllVideos/"+vid
    vidcap = cv2.VideoCapture(vid)
    def getFrame(sec):
        vidcap.set(cv2.CAP_PROP_POS_MSEC,sec*1000)
        hasFrames,image = vidcap.read()
        if hasFrames:
            cv2.imwrite("D:/Images/Frames/image"+str(count)+".jpg", image) # Save frame as JPG file
        return hasFrames
    sec = 0
    frameRate = 0.5 # Change this number to 1 for each 1 second
    
    success = getFrame(sec)
    while success:
        count = count + 1
        sec = sec + frameRate
        sec = round(sec, 2)
        success = getFrame(sec)

Following script will extract frames every half a second of all videos in folder. (Works on python 3.7)

import cv2
import os
listing = os.listdir(r'D:/Images/AllVideos')
count=1
for vid in listing:
    vid = r"D:/Images/AllVideos/"+vid
    vidcap = cv2.VideoCapture(vid)
    def getFrame(sec):
        vidcap.set(cv2.CAP_PROP_POS_MSEC,sec*1000)
        hasFrames,image = vidcap.read()
        if hasFrames:
            cv2.imwrite("D:/Images/Frames/image"+str(count)+".jpg", image) # Save frame as JPG file
        return hasFrames
    sec = 0
    frameRate = 0.5 # Change this number to 1 for each 1 second
    
    success = getFrame(sec)
    while success:
        count = count + 1
        sec = sec + frameRate
        sec = round(sec, 2)
        success = getFrame(sec)

如何将RGB图像转换为numpy数组?

问题:如何将RGB图像转换为numpy数组?

我有RGB图像。我想将其转换为numpy数组。我做了以下

im = cv.LoadImage("abc.tiff")
a = numpy.asarray(im)

它创建一个没有形状的数组。我假设它是一个iplimage对象。

I have an RGB image. I want to convert it to numpy array. I did the following

im = cv.LoadImage("abc.tiff")
a = numpy.asarray(im)

It creates an array with no shape. I assume it is a iplimage object.


回答 0

您可以使用较新的OpenCV python接口(如果我没记错的话,自Ope​​nCV 2.2起就可以使用)。它本机使用numpy数组:

import cv2
im = cv2.imread("abc.tiff",mode='RGB')
print type(im)

结果:

<type 'numpy.ndarray'>

You can use newer OpenCV python interface (if I’m not mistaken it is available since OpenCV 2.2). It natively uses numpy arrays:

import cv2
im = cv2.imread("abc.tiff",mode='RGB')
print type(im)

result:

<type 'numpy.ndarray'>

回答 1

PIL(Python影像库)和Numpy可以很好地协同工作。

我使用以下功能。

from PIL import Image
import numpy as np

def load_image( infilename ) :
    img = Image.open( infilename )
    img.load()
    data = np.asarray( img, dtype="int32" )
    return data

def save_image( npdata, outfilename ) :
    img = Image.fromarray( np.asarray( np.clip(npdata,0,255), dtype="uint8"), "L" )
    img.save( outfilename )

“ Image.fromarray”有点难看,因为我将传入的数据裁剪为[0,255],转换为字节,然后创建灰度图像。我大部分时间都是灰色工作。

RGB图像如下所示:

 outimg = Image.fromarray( ycc_uint8, "RGB" )
 outimg.save( "ycc.tif" )

PIL (Python Imaging Library) and Numpy work well together.

I use the following functions.

from PIL import Image
import numpy as np

def load_image( infilename ) :
    img = Image.open( infilename )
    img.load()
    data = np.asarray( img, dtype="int32" )
    return data

def save_image( npdata, outfilename ) :
    img = Image.fromarray( np.asarray( np.clip(npdata,0,255), dtype="uint8"), "L" )
    img.save( outfilename )

The ‘Image.fromarray’ is a little ugly because I clip incoming data to [0,255], convert to bytes, then create a grayscale image. I mostly work in gray.

An RGB image would be something like:

 outimg = Image.fromarray( ycc_uint8, "RGB" )
 outimg.save( "ycc.tif" )

回答 2

您也可以为此使用matplotlib

from matplotlib.image import imread

img = imread('abc.tiff')
print(type(img))

输出: <class 'numpy.ndarray'>

You can also use matplotlib for this.

from matplotlib.image import imread

img = imread('abc.tiff')
print(type(img))

output: <class 'numpy.ndarray'>


回答 3

截至今天,您最好的选择是使用:

img = cv2.imread(image_path)   # reads an image in the BGR format
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   # BGR -> RGB

您将看到img一个类型为numpy的数组:

<class 'numpy.ndarray'>

As of today, your best bet is to use:

img = cv2.imread(image_path)   # reads an image in the BGR format
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   # BGR -> RGB

You’ll see img will be a numpy array of type:

<class 'numpy.ndarray'>

回答 4

答案较晚,但imageio与其他替代方案相比,我更喜欢该模块

import imageio
im = imageio.imread('abc.tiff')

与相似cv2.imread(),默认情况下会生成numpy数组,但格式为RGB。

Late answer, but I’ve come to prefer the imageio module to the other alternatives

import imageio
im = imageio.imread('abc.tiff')

Similar to cv2.imread(), it produces a numpy array by default, but in RGB form.


回答 5

您需要使用cv.LoadImageM而不是cv.LoadImage:

In [1]: import cv
In [2]: import numpy as np
In [3]: x = cv.LoadImageM('im.tif')
In [4]: im = np.asarray(x)
In [5]: im.shape
Out[5]: (487, 650, 3)

You need to use cv.LoadImageM instead of cv.LoadImage:

In [1]: import cv
In [2]: import numpy as np
In [3]: x = cv.LoadImageM('im.tif')
In [4]: im = np.asarray(x)
In [5]: im.shape
Out[5]: (487, 650, 3)

回答 6

当使用David Poole的答案时,出现灰度PNG以及其他文件的SystemError。我的解决方案是:

import numpy as np
from PIL import Image

img = Image.open( filename )
try:
    data = np.asarray( img, dtype='uint8' )
except SystemError:
    data = np.asarray( img.getdata(), dtype='uint8' )

实际上img.getdata()适用于所有文件,但速度较慢,因此仅在其他方法失败时才使用它。

When using the answer from David Poole I get a SystemError with gray scale PNGs and maybe other files. My solution is:

import numpy as np
from PIL import Image

img = Image.open( filename )
try:
    data = np.asarray( img, dtype='uint8' )
except SystemError:
    data = np.asarray( img.getdata(), dtype='uint8' )

Actually img.getdata() would work for all files, but it’s slower, so I use it only when the other method fails.


回答 7

OpenCV映像格式支持numpy数组接口。可以创建一个辅助功能来支持灰度或彩色图像。这意味着可以使用numpy slice而不是图像数据的完整副本方便地完成BGR-> RGB转换。

注意:这是一个大技巧,因此修改输出数组也将更改OpenCV图像数据。如果要复制,请.copy()在阵列上使用方法!

import numpy as np

def img_as_array(im):
    """OpenCV's native format to a numpy array view"""
    w, h, n = im.width, im.height, im.channels
    modes = {1: "L", 3: "RGB", 4: "RGBA"}
    if n not in modes:
        raise Exception('unsupported number of channels: {0}'.format(n))
    out = np.asarray(im)
    if n != 1:
        out = out[:, :, ::-1]  # BGR -> RGB conversion
    return out

OpenCV image format supports the numpy array interface. A helper function can be made to support either grayscale or color images. This means the BGR -> RGB conversion can be conveniently done with a numpy slice, not a full copy of image data.

Note: this is a stride trick, so modifying the output array will also change the OpenCV image data. If you want a copy, use .copy() method on the array!

import numpy as np

def img_as_array(im):
    """OpenCV's native format to a numpy array view"""
    w, h, n = im.width, im.height, im.channels
    modes = {1: "L", 3: "RGB", 4: "RGBA"}
    if n not in modes:
        raise Exception('unsupported number of channels: {0}'.format(n))
    out = np.asarray(im)
    if n != 1:
        out = out[:, :, ::-1]  # BGR -> RGB conversion
    return out

回答 8

我也采用了imageio,但发现以下机器可用于预处理和后期处理:

import imageio
import numpy as np

def imload(*a, **k):
    i = imageio.imread(*a, **k)
    i = i.transpose((1, 0, 2))  # x and y are mixed up for some reason...
    i = np.flip(i, 1)  # make coordinate system right-handed!!!!!!
    return i/255


def imsave(i, url, *a, **k):
    # Original order of arguments was counterintuitive. It should
    # read verbally "Save the image to the URL" — not "Save to the
    # URL the image."

    i = np.flip(i, 1)
    i = i.transpose((1, 0, 2))
    i *= 255

    i = i.round()
    i = np.maximum(i, 0)
    i = np.minimum(i, 255)

    i = np.asarray(i, dtype=np.uint8)

    imageio.imwrite(url, i, *a, **k)

原因是我使用numpy进行图像处理,而不仅仅是图像显示。为此,uint8s很尴尬,因此我将其转换为从0到1的浮点值。

保存图像时,我注意到我必须自己剪切超出范围的值,否则最终会得到真正的灰色输出。(灰色输出是将整个范围(在[0,256]之外)压缩到范围内的值的图像的结果。)

我在评论中也提到了其他一些奇怪之处。

I also adopted imageio, but I found the following machinery useful for pre- and post-processing:

import imageio
import numpy as np

def imload(*a, **k):
    i = imageio.imread(*a, **k)
    i = i.transpose((1, 0, 2))  # x and y are mixed up for some reason...
    i = np.flip(i, 1)  # make coordinate system right-handed!!!!!!
    return i/255


def imsave(i, url, *a, **k):
    # Original order of arguments was counterintuitive. It should
    # read verbally "Save the image to the URL" — not "Save to the
    # URL the image."

    i = np.flip(i, 1)
    i = i.transpose((1, 0, 2))
    i *= 255

    i = i.round()
    i = np.maximum(i, 0)
    i = np.minimum(i, 255)

    i = np.asarray(i, dtype=np.uint8)

    imageio.imwrite(url, i, *a, **k)

The rationale is that I am using numpy for image processing, not just image displaying. For this purpose, uint8s are awkward, so I convert to floating point values ranging from 0 to 1.

When saving images, I noticed I had to cut the out-of-range values myself, or else I ended up with a really gray output. (The gray output was the result of imageio compressing the full range, which was outside of [0, 256), to values that were inside the range.)

There were a couple other oddities, too, which I mentioned in the comments.


回答 9

您可以使用numpy和轻松获得RGB图片的numpy数组Image from PIL

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

im = Image.open('*image_name*') #These two lines
im_arr = np.array(im) #are all you need
plt.imshow(im_arr) #Just to verify that image array has been constructed properly

You can get numpy array of rgb image easily by using numpy and Image from PIL

import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

im = Image.open('*image_name*') #These two lines
im_arr = np.array(im) #are all you need
plt.imshow(im_arr) #Just to verify that image array has been constructed properly

回答 10

使用以下语法加载图像:

from keras.preprocessing import image

X_test=image.load_img('four.png',target_size=(28,28),color_mode="grayscale"); #loading image and then convert it into grayscale and with it's target size 
X_test=image.img_to_array(X_test); #convert image into array

load the image by using following syntax:-

from keras.preprocessing import image

X_test=image.load_img('four.png',target_size=(28,28),color_mode="grayscale"); #loading image and then convert it into grayscale and with it's target size 
X_test=image.img_to_array(X_test); #convert image into array

ImportError:DLL加载失败:%1不是有效的Win32应用程序。但是DLL在那里

问题:ImportError:DLL加载失败:%1不是有效的Win32应用程序。但是DLL在那里

我的情况非常类似于ImportError上的情况:DLL加载失败:%1不是有效的Win32应用程序,但是答案对我不起作用。

我的Python代码说:

import cv2

但是该行引发了此问题标题中显示的错误。

C:\lib\opencv在这台64位计算机上安装了OpenCV 。我正在使用64位Python。

我的PYTHONPATH变量:PYTHONPATH=C:\lib\opencv\build\python\2.7。该文件夹包含cv2.pyd所有内容。

我的PATH变量:Path=%OPENCV_DIR%\bin;...此文件夹包含39个DLL文件,例如opencv_core246d.dll

OPENCV_DIR具有以下值:OPENCV_DIR=C:\lib\opencv\build\x64\vc11

ImportError上的解决方案:DLL加载失败:%1不是有效的Win32应用程序,表示要向C:\opencv\build\bin\ReleaseWindows PATH环境变量添加新的opencv二进制路径()。但是,如上所示,C:\lib\opencv\build\x64\vc11\bin我的PATH中已经有OpenCV Binaries文件夹()。而且我的OpenCV安装没有任何Release文件夹(build / java下为空)。

关于出什么问题有什么想法吗?我可以告诉Python详细跟踪加载过程吗?究竟要寻找什么DLL?

谢谢,拉斯

编辑:

我只注意到,根据http://www.dependencywalker.com/中,cv2.pydC:\lib\opencv\build\python\2.7为32位,而我运行的机器和Python的是64位。可能是问题所在吗?如果是这样,我在哪里可以找到cv2.pyd的64位版本?

I have a situation very much like the one at ImportError: DLL load failed: %1 is not a valid Win32 application, but the answer there isn’t working for me.

My Python code says:

import cv2

But that line throws the error shown in the title of this question.

I have OpenCV installed in C:\lib\opencv on this 64-bit machine. I’m using 64-bit Python.

My PYTHONPATH variable: PYTHONPATH=C:\lib\opencv\build\python\2.7. This folder contains cv2.pyd and that’s all.

My PATH variable: Path=%OPENCV_DIR%\bin;... This folder contains 39 DLL files such as opencv_core246d.dll.

OPENCV_DIR has this value: OPENCV_DIR=C:\lib\opencv\build\x64\vc11.

The solution at ImportError: DLL load failed: %1 is not a valid Win32 application says to add “the new opencv binaries path (C:\opencv\build\bin\Release) to the Windows PATH environment variable”. But as shown above, I already have the OpenCV binaries folder (C:\lib\opencv\build\x64\vc11\bin) in my PATH. And my OpenCV installation doesn’t have any Release folders (except for an empty one under build/java).

Any ideas as to what’s going wrong? Can I tell Python to verbosely trace the loading process? Exactly what DLL’s is it looking for?

Thanks, Lars

EDIT:

I just noticed that, according to http://www.dependencywalker.com/, the cv2.pyd in C:\lib\opencv\build\python\2.7 is 32-bit, whereas the machine and the Python I’m running are 64-bit. Could that be the problem? And if so, where can I find a 64-bit version of cv2.pyd?


回答 0


回答 1

请检查您使用的python版本是否也是64位。如果没有,那可能就是问题所在。您将使用32位python版本,并且已为OPENCV库安装了64位二进制文​​件。

Please check if the python version you are using is also 64 bit. If not then that could be the issue. You would be using a 32 bit python version and would have installed a 64 bit binaries for the OPENCV library.


回答 2

哇,我发现了这个问题的另一种情况。以上都不起作用。最终,我使用python的功能来内省正在加载的内容。对于python 2.7,这意味着:

import imp
imp.find_module("cv2")

这在Anaconda DLL目录中打开了一个完全意外的“ cv2.pyd”文件,多次卸载/安装尝试均未涉及该文件。Python首先是在那儿寻找的,却找不到我的好安装。我删除了该cv2.pyd文件,然后再次尝试imp.find_module(“ cv2”),python立即找到了正确的文件,并且cv2开始工作。

因此,如果没有其他解决方案对您有用,请确保您使用python内省来查看python尝试加载的文件。

Wow, I found yet another case for this problem. None of the above worked. Eventually I used python’s ability to introspect what was being loaded. For python 2.7 this means:

import imp
imp.find_module("cv2")

This turned up a completely unexpected “cv2.pyd” file in an Anaconda DLL directory that wasn’t touched by multiple uninstall/install attempts. Python was looking there first and not finding my good installation. I deleted that cv2.pyd file and tried imp.find_module(“cv2”) again and python immediately found the right file and cv2 started working.

So if none of the other solutions work for you, make sure you use python introspection to see what file python is trying to load.


回答 3

就我而言,我有64位python,而lxml是错误的版本-我也应该一直使用x64版本。我通过在此处下载lxml的64位版本来解决此问题:

https://pypi.python.org/pypi/lxml/3.4.1

lxml-3.4.1.win-amd64-py2.7.exe

这是一个令人沮丧的问题的最简单答案。

In my case, I have 64bit python, and it was lxml that was the wrong version–I should have been using the x64 version of that as well. I solved this by downloading the 64-bit version of lxml here:

https://pypi.python.org/pypi/lxml/3.4.1

lxml-3.4.1.win-amd64-py2.7.exe

This was the simplest answer to a frustrating issue.


回答 4

我只是遇到了这个问题,原来是因为我使用的是x64版本的opencv文件。尝试了x86,它起作用了。

I just had this problem, it turns it was just because I was using x64 version of the opencv file. Tried the x86 and it worked.


回答 5

如果您的构建系统(在我的情况下为CMake)将文件从复制<name>.dll<name>.pyd,则如果原始文件实际上不是dll,则会出现此错误。就我而言,构建共享库已关闭,因此基础文件实际上是一个*.lib

我通过将pyd文件加载到DependencyWalker中并发现它无效而发现了此错误。

If your build-system (CMake in my case) copies the file from <name>.dll to <name>.pyd, you will get this error if the original file wasn’t actually a dll. In my case, building shared libraries got switched off, so the underlying file was actually a *.lib.

I discovered this error by loading the pyd file in DependencyWalker and finding that it wasn’t valid.


回答 6

我有同样的问题。这是我所做的:

  1. 我从这里下载pywin32 Wheel文件,然后

  2. 我卸载了pywin32模块。要卸载,请在命令提示符中执行以下命令。

    pip uninstall pywin32

  3. 然后,我重新安装了pywin32。要安装它,请在pywin32 wheel文件所在的目录中打开命令提示符。然后执行以下命令。

    pip install <Name of the wheel file with extension> 车轮文件将类似于:piwin32-XXX-cpXX-none-win32.whl

它为我解决了问题。您可能还想尝试一下。希望它也对您有用。

I had the same problem. Here’s what I did:

  1. I downloaded pywin32 Wheel file from here, then

  2. I uninstalled the pywin32 module. To uninstall execute the following command in Command Prompt.

    pip uninstall pywin32

  3. Then, I reinstalled pywin32. To install it, open the Command Prompt in the same directory where the pywin32 wheel file lies. Then execute the following command.

    pip install <Name of the wheel file with extension> Wheel file will be like: piwin32-XXX-cpXX-none-win32.whl

It solvs the problem for me. You may also like to give it a try. Hope it work for you as well.


回答 7

cv2.pyd/opencv/build/python/2.7/x86文件夹而不是从/x64文件夹复制文件到C:/Python27/Lib/site-packeges。我按照此处提供的其余说明进行操作。

由其他人添加,未经验证:我还复制了文件 cv2.pyd到folder C:/Python27/Lib/site-packages/cv2。有用。

I copied cv2.pyd file from /opencv/build/python/2.7/x86 folder instead of from /x64 folder to C:/Python27/Lib/site-packeges. I followed rest of the instructions provided here.

Added by someone else, not verified: I also copy file cv2.pyd to folder C:/Python27/Lib/site-packages/cv2. It works.


回答 8

对我来说,问题是我在同一个 Eclipse项目中使用了不同版本的Python 。我的设置与“ 项目属性”和“ 运行配置” Python版本不一致。

项目>属性> PyDev中,将解释器设置为Python2.7.11。

“运行配置”>“解释器”中,我正在使用“默认解释器”。将其更改为Python 2.7.11可解决此问题。

For me the problem was that I was using different versions of Python in the same Eclipse project. My setup was not consistent with the Project Properties and the Run Configuration Python versions.

In Project > Properties > PyDev, I had the Interpreter set to Python2.7.11.

In Run Configurations > Interpreter, I was using the Default Interpreter. Changing it to Python 2.7.11 fixed the problem.


回答 9

当我使用32位Windows Installer在系统上卸载并重新安装其他版本的2.7.x Python时,遇到了相同的问题。我的大多数导入语句都出现相同的错误。我卸载了新安装的Python,然后下载了64位Windows安装程序,然后再次重新安装了Python,它可以正常工作。希望对您有帮助。

I faced the same issue when I uninstalled and reinstalled a different version of 2.7.x of Python on my system using a 32 bit Windows Installer. I got the same error on most of my import statements. I uninstalled the newly installed Python and downloaded a 64 bit Windows installer and reinstalled Python again and it worked. Hope this helps you.


回答 10

所以我在Windows下安装vtk时遇到问题(由于我使用python 3.7,到目前为止,仅适用于较旧的python版本,没有可用的二进制文件pip install vtk无法正常工作)

我确实在cmd中编写了python:

Python 3.7.3 on win32

所以我现在知道我的python 3.7.3在32位上运行。

然后,我在下载了正确的车轮 VTK-8.2.0-cp37-cp37m-win32.whl

接下来,我安装了该轮子:

pip install VTK-8.2.0-cp37-cp37m-win32.whl

然后,我对其进行了测试并成功运行:

python
import vtk

So I had problems installing vtk under windows (as I use python 3.7 there is no binary available so far just for older python versions pip install vtk is not working)

I did wrote python in my cmd:

Python 3.7.3 on win32

So I now know I have python 3.7.3 runing on a 32 bit.

I then downloaded the correct wheel at VTK‑8.2.0‑cp37‑cp37m‑win32.whl

Next I instlled that wheel:

pip install VTK-8.2.0-cp37-cp37m-win32.whl

Then I tested it and it worked:

python
import vtk

回答 11

更新numpy。

pip install numpy --upgrade

为我工作!

Update numpy.

pip install numpy --upgrade

Work for me!!


回答 12

首先,我cv2.pyd从复制/opencv/build/python/2.7/x86C:/Python27/Lib/site-packeges。错误是

“ RuntimeError:模块是根据API版本9编译的,但此版本的numpy是7”

然后我安装了numpy-1.8.0-win32-superpack-python2.7.exeopencv,并且工作正常。

>>> import cv2
>>> print cv2.__version__
2.4.13

First I copied cv2.pyd from /opencv/build/python/2.7/x86 to C:/Python27/Lib/site-packeges. The error was

“RuntimeError: module compiled against API version 9 but this version of numpy is 7”

Then I installed numpy-1.8.0-win32-superpack-python2.7.exe and opencv works fine.

>>> import cv2
>>> print cv2.__version__
2.4.13

回答 13

您可以opencv官方非官方站点进行安装。

如果使用,请参考问题和问题Anaconda

You can install opencv from official or unofficial sites.

Refer to this question and this issue if you are using Anaconda.


回答 14

  1. 请确保您已安装python 2.7.12或更低版本,否则您肯定会收到此错误。
  2. 如果操作系统为64位,请确保已安装64位Oracle客户端。
  3. 确保用于Python 2.7的Microsoft Visual C ++编译器对于64位Os为64位,对于32位为32位。注意:-如果您的操作系统是64位,则安装所有64位软件包;如果操作系统是32位,则安装32位软件包。
  1. Please make sure that you have installed python 2.7.12 or below version otherwise you will get this error definitely.
  2. Make sure Oracle client is 64 bit installed if OS is 64 Bit.
  3. Make sure Microsoft Visual C++ Compiler for Python 2.7 is 64 for bit for 64 bit Os or 32 bit for 32 bit. Note:- IF ur OS is 64 bit install all package of 64 bit or if Os is 32 bit install 32 bit package.

回答 15

它有一个非常简单的解决方案。安装后的opencv 地方

cv2.pydC:\opencv\build\python\2.7\ **x64**C:\Python27\Lib\site-packages

代替,cv2.pydC:\opencv\build\python\2.7\ **x86**C:\Python27\Lib\site-packages

It has a very simple solution. After installing opencv place

cv2.pyd from C:\opencv\build\python\2.7\ **x64** to C:\Python27\Lib\site-packages

instead of, place cv2.pyd from C:\opencv\build\python\2.7\ **x86** to C:\Python27\Lib\site-packages


回答 16

尝试导入MySQLdb时出现此错误。

对我有用的是卸载Python,然后重新安装。

安装npm(https://www.npmjs.com/get-npm)后出现错误。它所做的一件事是即使我已经安装了Python,也要安装它。

I got this error when trying to import MySQLdb.

What worked for me was to uninstall Python and then reinstall it.

I got the error after installing npm (https://www.npmjs.com/get-npm). One thing it did was install Python even though I already had it.


回答 17

这对我有用。我尝试了不同的方法,但这是我最好的解决方案。

打开命令提示符并键入以下内容; pip install opencv-python。(确保您的互联网已打开)。之后,请尝试再次导入。

This has worked for me. I have tried different methods but this was my best solution.

Open command prompt and type the following; pip install opencv-python. (make sure your internet is on). after that try importing it again.


回答 18

这个跟我一起工作

pip install -- pywin32==227

This one worked with me

pip install -- pywin32==227

回答 19

我找到了解决方案,也许您可​​以尝试使用cmd窗口而不是anaconda提示窗口来开始您的第一笔测试。

I found the solution, maybe you can try to use the cmd window rather than the anaconda prompt window to start you first scrapy test.


cv2.imshow命令在opencv-python中无法正常工作

问题:cv2.imshow命令在opencv-python中无法正常工作

我正在使用opencv 2.4.2,python 2.7下面的简单代码创建了一个正确名称的窗口,但其内容为空白,并且不显示图像:

import cv2
img=cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('ImageWindow',img)

有谁知道这个问题吗?

I’m using opencv 2.4.2, python 2.7 The following simple code created a window of the correct name, but its content is just blank and doesn’t show the image:

import cv2
img=cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('ImageWindow',img)

does anyone knows about this issue?


回答 0

imshow()仅适用于waitKey()

import cv2
img = cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('ImageWindow', img)
cv2.waitKey()

(更新窗口所需的整个消息循环都隐藏在其中。)

imshow() only works with waitKey():

import cv2
img = cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('ImageWindow', img)
cv2.waitKey()

(The whole message-loop necessary for updating the window is hidden in there.)


回答 1

我在这里找到了最适合我的答案:http : //txt.arboreus.com/2012/07/11/highgui-opencv-window-from-ipython.html

如果您运行交互式ipython会话,并希望使用highgui窗口,请首先执行cv2.startWindowThread()。

详细而言:HighGUI是简化的界面,用于显示来自OpenCV代码的图像和视频。它应该像这样简单:

import cv2
img = cv2.imread("image.jpg")
cv2.startWindowThread()
cv2.namedWindow("preview")
cv2.imshow("preview", img)

I found the answer that worked for me here: http://txt.arboreus.com/2012/07/11/highgui-opencv-window-from-ipython.html

If you run an interactive ipython session, and want to use highgui windows, do cv2.startWindowThread() first.

In detail: HighGUI is a simplified interface to display images and video from OpenCV code. It should be as easy as:

import cv2
img = cv2.imread("image.jpg")
cv2.startWindowThread()
cv2.namedWindow("preview")
cv2.imshow("preview", img)

回答 2

您必须cv2.waitKey(0)在之后使用cv2.imshow("window",img)。只有这样,它才能起作用。

import cv2
img=cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('Window',img)
cv2.waitKey(0)

You must use cv2.waitKey(0) after cv2.imshow("window",img). Only then will it work.

import cv2
img=cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('Window',img)
cv2.waitKey(0)

回答 3

如果您在Python控制台中运行,请执行以下操作:

img = cv2.imread("yourimage.jpg")

cv2.imshow("img", img); cv2.waitKey(0); cv2.destroyAllWindows()

然后,如果您按Enter图像,它将成功关闭图像,您可以继续运行其他命令。

If you are running inside a Python console, do this:

img = cv2.imread("yourimage.jpg")

cv2.imshow("img", img); cv2.waitKey(0); cv2.destroyAllWindows()

Then if you press Enter on the image, it will successfully close the image and you can proceed running other commands.


回答 4

我遇到了同样的问题。我试图从IDLE中读取图像,并尝试使用显示它cv2.imshow(),但是显示窗口冻结,并且显示pythonw.exe在尝试关闭窗口时无响应。

下面的帖子给出了可能发生这种情况的可能解释

pythonw.exe没有响应

基本上,不要从IDLE中执行此操作。编写脚本并从shell或脚本(如果在Windows中)直接运行它,方法是使用.pyw扩展名命名并双击它。显然,IDLE自己的事件之间存在冲突循环和来自GUI工具包的循环。

当我imshow()在脚本中使用并执行它,而不是直接在IDLE上运行它时,它就起作用了。

I faced the same issue. I tried to read an image from IDLE and tried to display it using cv2.imshow(), but the display window freezes and shows pythonw.exe is not responding when trying to close the window.

The post below gives a possible explanation for why this is happening

pythonw.exe is not responding

Basically, don’t do this from IDLE. Write a script and run it from the shell or the script directly if in windows, by naming it with a .pyw extension and double clicking it. There is apparently a conflict between IDLE’s own event loop and the ones from GUI toolkits.

When I used imshow() in a script and execute it rather than running it directly over IDLE, it worked.


回答 5

最后添加cv2.waitKey(0)

add cv2.waitKey(0) in the end.


回答 6

对我来说,数字大于0的waitKey()有效

    cv2.waitKey(1)

For me waitKey() with number greater than 0 worked

    cv2.waitKey(1)

回答 7

您在此线程中的某处拥有了所有必要的部分:

if cv2.waitKey(): cv2.destroyAllWindows()

在IDLE中对我来说效果很好。

You’ve got all the necessary pieces somewhere in this thread:

if cv2.waitKey(): cv2.destroyAllWindows()

works fine for me in IDLE.


回答 8

如果您没有使它起作用,那么最好放

import cv2
img=cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('Window',img)
cv2.waitKey(0)

成一个文件并运行它。

If you have not made this working, you better put

import cv2
img=cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('Window',img)
cv2.waitKey(0)

into one file and run it.


回答 9

之后不需要任何其他方法waitKey(0)(回复上面的代码)

import cv2
img=cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('ImageWindow',img)
cv2.waitKey(0)

出现窗口->单击窗口,然后单击Enter。窗口将关闭。

Doesn’t need any additional methods after waitKey(0) (reply for above code)

import cv2
img=cv2.imread('C:/Python27/03323_HD.jpg')
cv2.imshow('ImageWindow',img)
cv2.waitKey(0)

Window appears -> Click on the Window & Click on Enter. Window will close.


回答 10

如果选择使用“ cv2.waitKey(0)”,请确保已编写“ cv2.waitKey(0)”而不是“ cv2.waitkey(0)”,因为小写的“ k”也可能冻结程序。

If you choose to use “cv2.waitKey(0)”, be sure that you have written “cv2.waitKey(0)” instead of “cv2.waitkey(0)”, because that lowercase “k” might freeze your program too.


回答 11

我也遇到-215错误。我以为imshow是问题所在,但是当我将imread更改为读取不存在的文件时,那里没有错误。因此,我将图像文件放在工作文件夹中,并添加了cv2.waitKey(0)并成功运行。

I also had a -215 error. I thought imshow was the issue, but when I changed imread to read in a non-existent file I got no error there. So I put the image file in the working folder and added cv2.waitKey(0) and it worked.


回答 12

错误:函数imshow中的(-215)size.width> 0 && size.height> 0

因为找不到图像,所以产生此错误。因此,这不是imshow函数的错误。

error: (-215) size.width>0 && size.height>0 in function imshow

This error is produced because the image is not found. So it’s not an error of imshow function.


回答 13

我遇到了相同的215错误,可以通过提供图像的完整路径来解决该错误,例如在C:\ Folder1 \ Folder2 \ filename.ext中

I had the same 215 error, which I was able to overcome by giving the full path to the image, as in, C:\Folder1\Folder2\filename.ext


Python OpenCV2(cv2)包装器获取图像大小?

问题:Python OpenCV2(cv2)包装器获取图像大小?

如何cv2在Python OpenCV(numpy)的包装器中获取图像的大小。除了之外还有其他正确的方法吗numpy.shape()?如何获得以下格式的尺寸:(宽度,高度)列表?

How to get the size of an image in cv2 wrapper in Python OpenCV (numpy). Is there a correct way to do that other than numpy.shape(). How can I get it in these format dimensions: (width, height) list?


回答 0

cv2numpy用于处理图像,因此使用来获取图像大小的正确和最佳方法是numpy.shape。假设您正在使用BGR图像,下面是一个示例:

>>> import numpy as np
>>> import cv2
>>> img = cv2.imread('foo.jpg')
>>> height, width, channels = img.shape
>>> print height, width, channels
  600 800 3

如果您正在使用二进制图像,img它将具有两个尺寸,因此必须将代码更改为:height, width = img.shape

cv2 uses numpy for manipulating images, so the proper and best way to get the size of an image is using numpy.shape. Assuming you are working with BGR images, here is an example:

>>> import numpy as np
>>> import cv2
>>> img = cv2.imread('foo.jpg')
>>> height, width, channels = img.shape
>>> print height, width, channels
  600 800 3

In case you were working with binary images, img will have two dimensions, and therefore you must change the code to: height, width = img.shape


回答 1

恐怕没有“更好”的方法来获得这种大小,但是没有那么多痛苦。

当然,您的代码对于二进制/单图像以及多通道图像都应该是安全的,但是图像的主要尺寸始终以numpy数组的形状排在首位。如果您选择可读性,或者不想打扰它,可以将其包装在一个函数中,并为其命名,例如cv_size

import numpy as np
import cv2

# ...

def cv_size(img):
    return tuple(img.shape[1::-1])

如果您在终端机/ ipython上,还可以使用lambda表示它:

>>> cv_size = lambda img: tuple(img.shape[1::-1])
>>> cv_size(img)
(640, 480)

def交互工作时,用编写函数并不有趣。

编辑

本来我以为可以使用[:2],但是numpy的形状是(height, width[, depth]),并且我们需要(width, height)cv2.resize预期的那样-因此我们必须使用[1::-1]。难忘的是[:2]。还有谁记得反向切片?

I’m afraid there is no “better” way to get this size, however it’s not that much pain.

Of course your code should be safe for both binary/mono images as well as multi-channel ones, but the principal dimensions of the image always come first in the numpy array’s shape. If you opt for readability, or don’t want to bother typing this, you can wrap it up in a function, and give it a name you like, e.g. cv_size:

import numpy as np
import cv2

# ...

def cv_size(img):
    return tuple(img.shape[1::-1])

If you’re on a terminal / ipython, you can also express it with a lambda:

>>> cv_size = lambda img: tuple(img.shape[1::-1])
>>> cv_size(img)
(640, 480)

Writing functions with def is not fun while working interactively.

Edit

Originally I thought that using [:2] was OK, but the numpy shape is (height, width[, depth]), and we need (width, height), as e.g. cv2.resize expects, so – we must use [1::-1]. Even less memorable than [:2]. And who remembers reverse slicing anyway?


ImportError:libSM.so.6:无法打开共享库文件:没有这样的文件或目录

问题:ImportError:libSM.so.6:无法打开共享库文件:没有这样的文件或目录

尝试导入OpenCV时,使用import cv2我得到以下错误:

/usr/local/lib/python2.7/dist-packages/cv2/__init__.py in <module>()
      7 
      8 # make IDE's (PyCharm) autocompletion happy
----> 9 from .cv2 import *
     10 
     11 # wildcard import above does not import "private" variables like __version__

ImportError: libSM.so.6: cannot open shared object file: No such file or directory

不确定如何解决-尝试使用Google的新协作工具。笔记本在这里:https : //drive.google.com/file/d/0B7-sJqBiyjCcRmFkMzl6cy1iN0k/view?usp=sharing

When trying to import OpenCV, using import cv2 I get the following error:

/usr/local/lib/python2.7/dist-packages/cv2/__init__.py in <module>()
      7 
      8 # make IDE's (PyCharm) autocompletion happy
----> 9 from .cv2 import *
     10 
     11 # wildcard import above does not import "private" variables like __version__

ImportError: libSM.so.6: cannot open shared object file: No such file or directory

Not sure how to fix this – trying to play around with Google’s new Colaboratory tool. Notebook is here: https://drive.google.com/file/d/0B7-sJqBiyjCcRmFkMzl6cy1iN0k/view?usp=sharing


回答 0

通过将其作为脚本的前两行来解决此问题:

!pip install opencv-python
!apt update && apt install -y libsm6 libxext6
!apt-get install -y libxrender-dev

This fixed the problem by having it as the first two lines of the script:

!pip install opencv-python
!apt update && apt install -y libsm6 libxext6
!apt-get install -y libxrender-dev

回答 1

您需要添加sudo。我做了以下事情来安装它:

sudo apt-get install libsm6 libxrender1 libfontconfig1

然后这样做(可选!也许您将不需要它)

sudo python3 -m pip install opencv-contrib-python

终于完成了!

You need to add sudo . I did the following to get it installed :

sudo apt-get install libsm6 libxrender1 libfontconfig1

and then did that (optional! maybe you won’t need it)

sudo python3 -m pip install opencv-contrib-python

FINALLY got it done !


回答 2

对于CentOS,运行以下命令: sudo yum install libXext libSM libXrender

For CentOS, run this: sudo yum install libXext libSM libXrender


回答 3

现在有一个无头版本,opencv-python该版本删除了图形依赖性(如libSM)。您可以在发布页面上看到普通版/无头版(以及导致该问题GitHub问题);只需-headless在安装时添加,例如

pip install opencv-python-headless
# also contrib, if needed
pip install opencv-contrib-python-headless

There is now a headless version of opencv-python which removes the graphical dependencies (like libSM). You can see the normal / headless version on the releases page (and the GitHub issue leading to this); just add -headless when installing, e.g.,

pip install opencv-python-headless
# also contrib, if needed
pip install opencv-contrib-python-headless

回答 4

可能是您的问题 python-opencv版本。最好将您的版本降级到3.3.0.9,其中不包含任何GUI依赖项。在GitHub的此处找到了相同的问题的答案链接。

May be the problem is with your python-opencv version. It’s better to downgrade your version to 3.3.0.9 which does not include any GUI dependencies. Same question was found on GitHub here the link to the answer.


回答 5

我在python:3.7-slimdocker盒上遇到了openCV的类似问题。以下对我有用:

apt-get install build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev

请查看是否有帮助!

I was facing similar issue with openCV on the python:3.7-slim docker box. Following did the trick for me :

apt-get install build-essential libglib2.0-0 libsm6 libxext6 libxrender-dev

Please see if this helps !


回答 6

我无法在Google Cloud Platform的Ubuntu上运行的Anaconda-Jupyter笔记本上安装cv2。但是我找到了一种方法,如下所示:

从ssh终端运行以下命令,并按照说明进行操作:

 sudo apt-get install libsm6 libxrender1 libfontconfig1

安装完毕后,打开Jupyter笔记本并运行以下命令:

!pip install opencv-contrib-python

注意:我尝试运行此命令:“ sudo python3 -m pip install opencv-contrib-python”,但显示错误。但是以上命令对我有用。

现在刷新笔记本页面,并通过import cv2在笔记本中运行检查它是否已安装。

I was not able to install cv2 on Anaconda-Jupyter notebook running on Ubuntu on Google Cloud Platform. But I found a way to do it as follows:

Run the following command from the ssh terminal and follow the instruction:

 sudo apt-get install libsm6 libxrender1 libfontconfig1

Once its installed Open the Jupyter notebook and run following command:

!pip install opencv-contrib-python

Note: I tried to run this command: “sudo python3 -m pip install opencv-contrib-python”but it was showing an error. But above command worked for me.

Now refresh the notebook page and check whether it’s installed or not by running import cv2 in the notebook.


回答 7

我遇到了同样的问题docker这些步骤对我有用:

apt update

然后:

apt install libsm6 libxext6 libxrender-dev

I had the same problem in docker and these steps worked for me:

apt update

then:

apt install libsm6 libxext6 libxrender-dev

将图像从PIL转换为openCV格式

问题:将图像从PIL转换为openCV格式

我正在尝试将图像从转换PILOpenCV格式。我正在使用OpenCV 2.4.3。这是到目前为止我一直尝试的。

>>> from PIL import Image
>>> import cv2 as cv
>>> pimg = Image.open('D:\\traffic.jpg')                           #PIL Image
>>> cimg = cv.cv.CreateImageHeader(pimg.size,cv.IPL_DEPTH_8U,3)    #CV Image
>>> cv.cv.SetData(cimg,pimg.tostring())
>>> cv.cv.NamedWindow('cimg')
>>> cv.cv.ShowImage('cimg',cimg)
>>> cv.cv.WaitKey()

但我认为图像未转换为CV格式。窗口向我显示了一个大的棕色图像。将图像从转换PILCV格式时,我在哪里出错?

另外,为什么我需要输入cv.cv访问功能?

I’m trying to convert image from PIL to OpenCV format. I’m using OpenCV 2.4.3. here is what I’ve attempted till now.

>>> from PIL import Image
>>> import cv2 as cv
>>> pimg = Image.open('D:\\traffic.jpg')                           #PIL Image
>>> cimg = cv.cv.CreateImageHeader(pimg.size,cv.IPL_DEPTH_8U,3)    #CV Image
>>> cv.cv.SetData(cimg,pimg.tostring())
>>> cv.cv.NamedWindow('cimg')
>>> cv.cv.ShowImage('cimg',cimg)
>>> cv.cv.WaitKey()

But I think the image is not getting converted to CV format. The Window shows me a large brown image. Where am I going wrong in Converting image from PIL to CV format?

Also , why do i need to type cv.cv to access functions?


回答 0

用这个:

pil_image = PIL.Image.open('Image.jpg').convert('RGB') 
open_cv_image = numpy.array(pil_image) 
# Convert RGB to BGR 
open_cv_image = open_cv_image[:, :, ::-1].copy() 

use this:

pil_image = PIL.Image.open('Image.jpg').convert('RGB') 
open_cv_image = numpy.array(pil_image) 
# Convert RGB to BGR 
open_cv_image = open_cv_image[:, :, ::-1].copy() 

回答 1

这是我能找到的最短版本,可以保存/隐藏额外的转换:

pil_image = PIL.Image.open('image.jpg')
opencvImage = cv2.cvtColor(numpy.array(pil_image), cv2.COLOR_RGB2BGR)

如果从URL读取文件:

import cStringIO
import urllib
file = cStringIO.StringIO(urllib.urlopen(r'http://stackoverflow.com/a_nice_image.jpg').read())
pil_image = PIL.Image.open(file)
opencvImage = cv2.cvtColor(numpy.array(pil_image), cv2.COLOR_RGB2BGR)

This is the shortest version I could find,saving/hiding an extra conversion:

pil_image = PIL.Image.open('image.jpg')
opencvImage = cv2.cvtColor(numpy.array(pil_image), cv2.COLOR_RGB2BGR)

If reading a file from a URL:

import cStringIO
import urllib
file = cStringIO.StringIO(urllib.urlopen(r'http://stackoverflow.com/a_nice_image.jpg').read())
pil_image = PIL.Image.open(file)
opencvImage = cv2.cvtColor(numpy.array(pil_image), cv2.COLOR_RGB2BGR)

如何使用OpenCV2.0和Python2.6调整图像大小

问题:如何使用OpenCV2.0和Python2.6调整图像大小

我想使用OpenCV2.0和Python2.6显示调整大小的图像。我在http://opencv.willowgarage.com/documentation/python/cookbook.html上使用并采用了该示例,但是不幸的是,该代码是针对OpenCV2.1的,并且似乎不适用于2.0。这是我的代码:

import os, glob
import cv

ulpath = "exampleshq/"

for infile in glob.glob( os.path.join(ulpath, "*.jpg") ):
    im = cv.LoadImage(infile)
    thumbnail = cv.CreateMat(im.rows/10, im.cols/10, cv.CV_8UC3)
    cv.Resize(im, thumbnail)
    cv.NamedWindow(infile)
    cv.ShowImage(infile, thumbnail)
    cv.WaitKey(0)
    cv.DestroyWindow(name)

由于我不能使用

cv.LoadImageM

我用了

cv.LoadImage

而是在其他应用程序中没有问题。但是,cv.iplimage没有属性行,列或大小。谁能给我一个提示,如何解决这个问题?谢谢。

I want to use OpenCV2.0 and Python2.6 to show resized images. I used and adopted this example but unfortunately, this code is for OpenCV2.1 and does not seem to be working on 2.0. Here my code:

import os, glob
import cv

ulpath = "exampleshq/"

for infile in glob.glob( os.path.join(ulpath, "*.jpg") ):
    im = cv.LoadImage(infile)
    thumbnail = cv.CreateMat(im.rows/10, im.cols/10, cv.CV_8UC3)
    cv.Resize(im, thumbnail)
    cv.NamedWindow(infile)
    cv.ShowImage(infile, thumbnail)
    cv.WaitKey(0)
    cv.DestroyWindow(name)

Since I cannot use

cv.LoadImageM

I used

cv.LoadImage

instead, which was no problem in other applications. Nevertheless, cv.iplimage has no attribute rows, cols or size. Can anyone give me a hint, how to solve this problem?


回答 0

如果要使用CV2,则需要使用该resize功能。

例如,这会将两个轴的大小调整一半:

small = cv2.resize(image, (0,0), fx=0.5, fy=0.5) 

并将图像调整为100列(宽度)和50行(高度):

resized_image = cv2.resize(image, (100, 50)) 

另一种选择是使用scipy模块,方法是:

small = scipy.misc.imresize(image, 0.5)

显然,您可以在这些函数的文档中阅读更多选项(cv2.resizescipy.misc.imresize)。


更新:
根据SciPy文档

imresize弃用的SciPy的1.0.0,并且将在1.2.0被删除。
使用skimage.transform.resize代替。

请注意,如果您要按一个大小调整大小,则可能确实需要skimage.transform.rescale

If you wish to use CV2, you need to use the resize function.

For example, this will resize both axes by half:

small = cv2.resize(image, (0,0), fx=0.5, fy=0.5) 

and this will resize the image to have 100 cols (width) and 50 rows (height):

resized_image = cv2.resize(image, (100, 50)) 

Another option is to use scipy module, by using:

small = scipy.misc.imresize(image, 0.5)

There are obviously more options you can read in the documentation of those functions (cv2.resize, scipy.misc.imresize).


Update:
According to the SciPy documentation:

imresize is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use skimage.transform.resize instead.

Note that if you’re looking to resize by a factor, you may actually want skimage.transform.rescale.


回答 1

示例将图像尺寸加倍

调整图像大小有两种方法。可以指定新的大小:

  1. 手动

    height, width = src.shape[:2]

    dst = cv2.resize(src, (2*width, 2*height), interpolation = cv2.INTER_CUBIC)

  2. 通过比例因子。

    dst = cv2.resize(src, None, fx = 2, fy = 2, interpolation = cv2.INTER_CUBIC),其中fx是沿水平轴的缩放比例,fy是沿垂直轴的缩放比例。

要缩小图像,通常使用INTER_AREA插值时效果最佳,而要放大图像,通常使用INTER_CUBIC(速度慢)或INTER_LINEAR(速度更快,但仍然可以看到)来最好。

示例缩小图像以适合最大高度/宽度(保持宽高比)

import cv2

img = cv2.imread('YOUR_PATH_TO_IMG')

height, width = img.shape[:2]
max_height = 300
max_width = 300

# only shrink if img is bigger than required
if max_height < height or max_width < width:
    # get scaling factor
    scaling_factor = max_height / float(height)
    if max_width/float(width) < scaling_factor:
        scaling_factor = max_width / float(width)
    # resize image
    img = cv2.resize(img, None, fx=scaling_factor, fy=scaling_factor, interpolation=cv2.INTER_AREA)

cv2.imshow("Shrinked image", img)
key = cv2.waitKey()

在cv2中使用代码

import cv2 as cv

im = cv.imread(path)

height, width = im.shape[:2]

thumbnail = cv.resize(im, (round(width / 10), round(height / 10)), interpolation=cv.INTER_AREA)

cv.imshow('exampleshq', thumbnail)
cv.waitKey(0)
cv.destroyAllWindows()

Example doubling the image size

There are two ways to resize an image. The new size can be specified:

  1. Manually;

    height, width = src.shape[:2]

    dst = cv2.resize(src, (2*width, 2*height), interpolation = cv2.INTER_CUBIC)

  2. By a scaling factor.

    dst = cv2.resize(src, None, fx = 2, fy = 2, interpolation = cv2.INTER_CUBIC), where fx is the scaling factor along the horizontal axis and fy along the vertical axis.

To shrink an image, it will generally look best with INTER_AREA interpolation, whereas to enlarge an image, it will generally look best with INTER_CUBIC (slow) or INTER_LINEAR (faster but still looks OK).

Example shrink image to fit a max height/width (keeping aspect ratio)

import cv2

img = cv2.imread('YOUR_PATH_TO_IMG')

height, width = img.shape[:2]
max_height = 300
max_width = 300

# only shrink if img is bigger than required
if max_height < height or max_width < width:
    # get scaling factor
    scaling_factor = max_height / float(height)
    if max_width/float(width) < scaling_factor:
        scaling_factor = max_width / float(width)
    # resize image
    img = cv2.resize(img, None, fx=scaling_factor, fy=scaling_factor, interpolation=cv2.INTER_AREA)

cv2.imshow("Shrinked image", img)
key = cv2.waitKey()

Using your code with cv2

import cv2 as cv

im = cv.imread(path)

height, width = im.shape[:2]

thumbnail = cv.resize(im, (round(width / 10), round(height / 10)), interpolation=cv.INTER_AREA)

cv.imshow('exampleshq', thumbnail)
cv.waitKey(0)
cv.destroyAllWindows()

回答 2

您可以使用GetSize函数获取这些信息,cv.GetSize(im)将返回一个具有图像宽度和高度的元组。您还可以使用im.depth和img.nChan获得更多信息。

为了调整图像的大小,我将使用略有不同的过程,使用另一个图像而不是矩阵。最好尝试使用相同类型的数据:

size = cv.GetSize(im)
thumbnail = cv.CreateImage( ( size[0] / 10, size[1] / 10), im.depth, im.nChannels)
cv.Resize(im, thumbnail)

希望这可以帮助 ;)

朱利安

You could use the GetSize function to get those information, cv.GetSize(im) would return a tuple with the width and height of the image. You can also use im.depth and img.nChan to get some more information.

And to resize an image, I would use a slightly different process, with another image instead of a matrix. It is better to try to work with the same type of data:

size = cv.GetSize(im)
thumbnail = cv.CreateImage( ( size[0] / 10, size[1] / 10), im.depth, im.nChannels)
cv.Resize(im, thumbnail)

Hope this helps ;)

Julien


回答 3

def rescale_by_height(image, target_height, method=cv2.INTER_LANCZOS4):
    """Rescale `image` to `target_height` (preserving aspect ratio)."""
    w = int(round(target_height * image.shape[1] / image.shape[0]))
    return cv2.resize(image, (w, target_height), interpolation=method)

def rescale_by_width(image, target_width, method=cv2.INTER_LANCZOS4):
    """Rescale `image` to `target_width` (preserving aspect ratio)."""
    h = int(round(target_width * image.shape[0] / image.shape[1]))
    return cv2.resize(image, (target_width, h), interpolation=method)
def rescale_by_height(image, target_height, method=cv2.INTER_LANCZOS4):
    """Rescale `image` to `target_height` (preserving aspect ratio)."""
    w = int(round(target_height * image.shape[1] / image.shape[0]))
    return cv2.resize(image, (w, target_height), interpolation=method)

def rescale_by_width(image, target_width, method=cv2.INTER_LANCZOS4):
    """Rescale `image` to `target_width` (preserving aspect ratio)."""
    h = int(round(target_width * image.shape[0] / image.shape[1]))
    return cv2.resize(image, (target_width, h), interpolation=method)

回答 4

这是一个在保持宽高比的同时按所需宽度或高度按比例缩放图像的功能

# Resizes a image and maintains aspect ratio
def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    # Grab the image size and initialize dimensions
    dim = None
    (h, w) = image.shape[:2]

    # Return original image if no need to resize
    if width is None and height is None:
        return image

    # We are resizing height if width is none
    if width is None:
        # Calculate the ratio of the height and construct the dimensions
        r = height / float(h)
        dim = (int(w * r), height)
    # We are resizing width if height is none
    else:
        # Calculate the ratio of the width and construct the dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # Return the resized image
    return cv2.resize(image, dim, interpolation=inter)

用法

import cv2

image = cv2.imread('1.png')
cv2.imshow('width_100', maintain_aspect_ratio_resize(image, width=100))
cv2.imshow('width_300', maintain_aspect_ratio_resize(image, width=300))
cv2.waitKey()

使用此示例图片

只需缩小到width=100(左)或放大到width=300(右)

Here’s a function to upscale or downscale an image by desired width or height while maintaining aspect ratio

# Resizes a image and maintains aspect ratio
def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    # Grab the image size and initialize dimensions
    dim = None
    (h, w) = image.shape[:2]

    # Return original image if no need to resize
    if width is None and height is None:
        return image

    # We are resizing height if width is none
    if width is None:
        # Calculate the ratio of the height and construct the dimensions
        r = height / float(h)
        dim = (int(w * r), height)
    # We are resizing width if height is none
    else:
        # Calculate the ratio of the width and construct the dimensions
        r = width / float(w)
        dim = (width, int(h * r))

    # Return the resized image
    return cv2.resize(image, dim, interpolation=inter)

Usage

import cv2

image = cv2.imread('1.png')
cv2.imshow('width_100', maintain_aspect_ratio_resize(image, width=100))
cv2.imshow('width_300', maintain_aspect_ratio_resize(image, width=300))
cv2.waitKey()

Using this example image

Simply downscale to width=100 (left) or upscale to width=300 (right)


如何消除数独方块中的凸度缺陷?

问题:如何消除数独方块中的凸度缺陷?

我当时在做一个有趣的项目:使用OpenCV(如Google护目镜等)从输入图像中解决数独问题。我已经完成了任务,但是最后我遇到了一个小问题。

我使用OpenCV 2.3.1的Python API进行了编程。

以下是我所做的:

  1. 读取图像
  2. 找到轮廓
  3. 选择面积最大的那个(也有些等同于正方形的那个)。
  4. 找到拐角点。

    例如下面给出:

    请注意,绿线正确地与数独的真实边界重合,因此数独可以正确变形。请检查下一张图片)

  5. 使图像变形为完美的正方形

    例如图片:

  6. 执行OCR(为此我使用了我在OpenCV-Python的简单数字识别OCR中给出的方法)

而且该方法效果很好。

问题:

退房 这张图片。

在此图像上执行步骤4会得到以下结果:

画出的红线是原始轮廓,是数独边界的真实轮廓。

画出的绿线是近似轮廓,它将是变形图像的轮廓。

数独顶部的绿线和红线之间当然有区别。因此,在扭曲时,我并没有获得数独的原始边界。

我的问题 :

如何在数独的正确边界(即红线)上扭曲图像,或者如何消除红线和绿线之间的差异?OpenCV中有什么方法吗?

I was doing a fun project: Solving a Sudoku from an input image using OpenCV (as in Google goggles etc). And I have completed the task, but at the end I found a little problem for which I came here.

I did the programming using Python API of OpenCV 2.3.1.

Below is what I did :

  1. Read the image
  2. Find the contours
  3. Select the one with maximum area, ( and also somewhat equivalent to square).
  4. Find the corner points.

    e.g. given below:

    (Notice here that the green line correctly coincides with the true boundary of the Sudoku, so the Sudoku can be correctly warped. Check next image)

  5. warp the image to a perfect square

    eg image:

  6. Perform OCR ( for which I used the method I have given in Simple Digit Recognition OCR in OpenCV-Python )

And the method worked well.

Problem:

Check out this image.

Performing the step 4 on this image gives the result below:

The red line drawn is the original contour which is the true outline of sudoku boundary.

The green line drawn is approximated contour which will be the outline of warped image.

Which of course, there is difference between green line and red line at the top edge of sudoku. So while warping, I am not getting the original boundary of the Sudoku.

My Question :

How can I warp the image on the correct boundary of the Sudoku, i.e. the red line OR how can I remove the difference between red line and green line? Is there any method for this in OpenCV?


回答 0

我有一个可行的解决方案,但是您必须自己将其转换为OpenCV。它用Mathematica编写。

第一步是通过将每个像素除以关闭操作的结果来调整图像的亮度:

src = ColorConvert[Import["http://davemark.com/images/sudoku.jpg"], "Grayscale"];
white = Closing[src, DiskMatrix[5]];
srcAdjusted = Image[ImageData[src]/ImageData[white]]

下一步是找到数独区域,因此我可以忽略(遮盖)背景。为此,我使用连接组件分析,然后选择凸面面积最大的组件:

components = 
  ComponentMeasurements[
    ColorNegate@Binarize[srcAdjusted], {"ConvexArea", "Mask"}][[All, 
    2]];
largestComponent = Image[SortBy[components, First][[-1, 2]]]

通过填充此图像,我得到了数独网格的蒙版:

mask = FillingTransform[largestComponent]

现在,我可以使用二阶导数滤波器在两个单独的图像中查找垂直线和水平线:

lY = ImageMultiply[MorphologicalBinarize[GaussianFilter[srcAdjusted, 3, {2, 0}], {0.02, 0.05}], mask];
lX = ImageMultiply[MorphologicalBinarize[GaussianFilter[srcAdjusted, 3, {0, 2}], {0.02, 0.05}], mask];

我再次使用连接的分量分析从这些图像中提取网格线。网格线比数字长得多,因此我可以使用卡尺长度来仅选择与网格线相连的组件。按位置对它们进行排序,对于图像中的每个垂直/水平网格线,我得到2×10的蒙版图像:

verticalGridLineMasks = 
  SortBy[ComponentMeasurements[
      lX, {"CaliperLength", "Centroid", "Mask"}, # > 100 &][[All, 
      2]], #[[2, 1]] &][[All, 3]];
horizontalGridLineMasks = 
  SortBy[ComponentMeasurements[
      lY, {"CaliperLength", "Centroid", "Mask"}, # > 100 &][[All, 
      2]], #[[2, 2]] &][[All, 3]];

接下来,我将每对垂直/水平网格线进行放大,将它们放大,计算出像素间的交点,并计算结果的中心。这些点是网格线的交点:

centerOfGravity[l_] := 
 ComponentMeasurements[Image[l], "Centroid"][[1, 2]]
gridCenters = 
  Table[centerOfGravity[
    ImageData[Dilation[Image[h], DiskMatrix[2]]]*
     ImageData[Dilation[Image[v], DiskMatrix[2]]]], {h, 
    horizontalGridLineMasks}, {v, verticalGridLineMasks}];

最后一步是为通过这些点的X / Y映射定义两个插值函数,并使用这些函数变换图像:

fnX = ListInterpolation[gridCenters[[All, All, 1]]];
fnY = ListInterpolation[gridCenters[[All, All, 2]]];
transformed = 
 ImageTransformation[
  srcAdjusted, {fnX @@ Reverse[#], fnY @@ Reverse[#]} &, {9*50, 9*50},
   PlotRange -> {{1, 10}, {1, 10}}, DataRange -> Full]

所有操作都是基本的图像处理功能,因此在OpenCV中也应该可行。基于样条的图像转换可能会更困难,但我认为您并不是真的需要它。可能使用您现在在每个单个单元格上使用的透视变换,将获得足够好的结果。

I have a solution that works, but you’ll have to translate it to OpenCV yourself. It’s written in Mathematica.

The first step is to adjust the brightness in the image, by dividing each pixel with the result of a closing operation:

src = ColorConvert[Import["http://davemark.com/images/sudoku.jpg"], "Grayscale"];
white = Closing[src, DiskMatrix[5]];
srcAdjusted = Image[ImageData[src]/ImageData[white]]

The next step is to find the sudoku area, so I can ignore (mask out) the background. For that, I use connected component analysis, and select the component that’s got the largest convex area:

components = 
  ComponentMeasurements[
    ColorNegate@Binarize[srcAdjusted], {"ConvexArea", "Mask"}][[All, 
    2]];
largestComponent = Image[SortBy[components, First][[-1, 2]]]

By filling this image, I get a mask for the sudoku grid:

mask = FillingTransform[largestComponent]

Now, I can use a 2nd order derivative filter to find the vertical and horizontal lines in two separate images:

lY = ImageMultiply[MorphologicalBinarize[GaussianFilter[srcAdjusted, 3, {2, 0}], {0.02, 0.05}], mask];
lX = ImageMultiply[MorphologicalBinarize[GaussianFilter[srcAdjusted, 3, {0, 2}], {0.02, 0.05}], mask];

I use connected component analysis again to extract the grid lines from these images. The grid lines are much longer than the digits, so I can use caliper length to select only the grid lines-connected components. Sorting them by position, I get 2×10 mask images for each of the vertical/horizontal grid lines in the image:

verticalGridLineMasks = 
  SortBy[ComponentMeasurements[
      lX, {"CaliperLength", "Centroid", "Mask"}, # > 100 &][[All, 
      2]], #[[2, 1]] &][[All, 3]];
horizontalGridLineMasks = 
  SortBy[ComponentMeasurements[
      lY, {"CaliperLength", "Centroid", "Mask"}, # > 100 &][[All, 
      2]], #[[2, 2]] &][[All, 3]];

Next I take each pair of vertical/horizontal grid lines, dilate them, calculate the pixel-by-pixel intersection, and calculate the center of the result. These points are the grid line intersections:

centerOfGravity[l_] := 
 ComponentMeasurements[Image[l], "Centroid"][[1, 2]]
gridCenters = 
  Table[centerOfGravity[
    ImageData[Dilation[Image[h], DiskMatrix[2]]]*
     ImageData[Dilation[Image[v], DiskMatrix[2]]]], {h, 
    horizontalGridLineMasks}, {v, verticalGridLineMasks}];

The last step is to define two interpolation functions for X/Y mapping through these points, and transform the image using these functions:

fnX = ListInterpolation[gridCenters[[All, All, 1]]];
fnY = ListInterpolation[gridCenters[[All, All, 2]]];
transformed = 
 ImageTransformation[
  srcAdjusted, {fnX @@ Reverse[#], fnY @@ Reverse[#]} &, {9*50, 9*50},
   PlotRange -> {{1, 10}, {1, 10}}, DataRange -> Full]

All of the operations are basic image processing function, so this should be possible in OpenCV, too. The spline-based image transformation might be harder, but I don’t think you really need it. Probably using the perspective transformation you use now on each individual cell will give good enough results.


回答 1

Nikie的答案解决了我的问题,但他的答案是在Mathematica中。因此,我认为我应该在这里给出其OpenCV改编版。但是在实施之后,我可以看到OpenCV代码比nikie的mathematica代码大得多。而且,我在OpenCV中找不到nikie完成的插值方法(尽管可以使用scipy完成,但是我会在时间到时告诉它。)

1.图像预处理(关闭操作)

import cv2
import numpy as np

img = cv2.imread('dave.jpg')
img = cv2.GaussianBlur(img,(5,5),0)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
mask = np.zeros((gray.shape),np.uint8)
kernel1 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11))

close = cv2.morphologyEx(gray,cv2.MORPH_CLOSE,kernel1)
div = np.float32(gray)/(close)
res = np.uint8(cv2.normalize(div,div,0,255,cv2.NORM_MINMAX))
res2 = cv2.cvtColor(res,cv2.COLOR_GRAY2BGR)

结果:

2.找到数独广场并创建蒙版图像

thresh = cv2.adaptiveThreshold(res,255,0,1,19,2)
contour,hier = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)

max_area = 0
best_cnt = None
for cnt in contour:
    area = cv2.contourArea(cnt)
    if area > 1000:
        if area > max_area:
            max_area = area
            best_cnt = cnt

cv2.drawContours(mask,[best_cnt],0,255,-1)
cv2.drawContours(mask,[best_cnt],0,0,2)

res = cv2.bitwise_and(res,mask)

结果:

3.查找垂直线

kernelx = cv2.getStructuringElement(cv2.MORPH_RECT,(2,10))

dx = cv2.Sobel(res,cv2.CV_16S,1,0)
dx = cv2.convertScaleAbs(dx)
cv2.normalize(dx,dx,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dx,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernelx,iterations = 1)

contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
    x,y,w,h = cv2.boundingRect(cnt)
    if h/w > 5:
        cv2.drawContours(close,[cnt],0,255,-1)
    else:
        cv2.drawContours(close,[cnt],0,0,-1)
close = cv2.morphologyEx(close,cv2.MORPH_CLOSE,None,iterations = 2)
closex = close.copy()

结果:

4.查找水平线

kernely = cv2.getStructuringElement(cv2.MORPH_RECT,(10,2))
dy = cv2.Sobel(res,cv2.CV_16S,0,2)
dy = cv2.convertScaleAbs(dy)
cv2.normalize(dy,dy,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dy,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernely)

contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
    x,y,w,h = cv2.boundingRect(cnt)
    if w/h > 5:
        cv2.drawContours(close,[cnt],0,255,-1)
    else:
        cv2.drawContours(close,[cnt],0,0,-1)

close = cv2.morphologyEx(close,cv2.MORPH_DILATE,None,iterations = 2)
closey = close.copy()

结果:

当然,这不是很好。

5.查找网格点

res = cv2.bitwise_and(closex,closey)

结果:

6.纠正缺陷

在这里,nikie进行某种插值,对此我了解不多。而且我找不到此OpenCV的任何相应功能。(也许在那里,我不知道)。

查看此SOF,它说明了如何使用SciPy进行此操作,我不想使用它:OpenCV中的图像转换

因此,在这里,我将每个子正方形的四个角用作每个变角透视图。

为此,首先我们找到质心。

contour, hier = cv2.findContours(res,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
centroids = []
for cnt in contour:
    mom = cv2.moments(cnt)
    (x,y) = int(mom['m10']/mom['m00']), int(mom['m01']/mom['m00'])
    cv2.circle(img,(x,y),4,(0,255,0),-1)
    centroids.append((x,y))

但是结果质心将不会被排序。查看下图以查看其顺序:

因此,我们从左到右,从上到下对它们进行排序。

centroids = np.array(centroids,dtype = np.float32)
c = centroids.reshape((100,2))
c2 = c[np.argsort(c[:,1])]

b = np.vstack([c2[i*10:(i+1)*10][np.argsort(c2[i*10:(i+1)*10,0])] for i in xrange(10)])
bm = b.reshape((10,10,2))

现在看下面他们的命令:

最后,我们应用转换并创建尺寸为450×450的新图像。

output = np.zeros((450,450,3),np.uint8)
for i,j in enumerate(b):
    ri = i/10
    ci = i%10
    if ci != 9 and ri!=9:
        src = bm[ri:ri+2, ci:ci+2 , :].reshape((4,2))
        dst = np.array( [ [ci*50,ri*50],[(ci+1)*50-1,ri*50],[ci*50,(ri+1)*50-1],[(ci+1)*50-1,(ri+1)*50-1] ], np.float32)
        retval = cv2.getPerspectiveTransform(src,dst)
        warp = cv2.warpPerspective(res2,retval,(450,450))
        output[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1] = warp[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1].copy()

结果:

结果几乎与nikie相同,但是代码长度很大。也许可以使用更好的方法,但是在那之前,这种方法行之有效。

关于方舟。

Nikie’s answer solved my problem, but his answer was in Mathematica. So I thought I should give its OpenCV adaptation here. But after implementing I could see that OpenCV code is much bigger than nikie’s mathematica code. And also, I couldn’t find interpolation method done by nikie in OpenCV ( although it can be done using scipy, i will tell it when time comes.)

1. Image PreProcessing ( closing operation )

import cv2
import numpy as np

img = cv2.imread('dave.jpg')
img = cv2.GaussianBlur(img,(5,5),0)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
mask = np.zeros((gray.shape),np.uint8)
kernel1 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11))

close = cv2.morphologyEx(gray,cv2.MORPH_CLOSE,kernel1)
div = np.float32(gray)/(close)
res = np.uint8(cv2.normalize(div,div,0,255,cv2.NORM_MINMAX))
res2 = cv2.cvtColor(res,cv2.COLOR_GRAY2BGR)

Result :

2. Finding Sudoku Square and Creating Mask Image

thresh = cv2.adaptiveThreshold(res,255,0,1,19,2)
contour,hier = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)

max_area = 0
best_cnt = None
for cnt in contour:
    area = cv2.contourArea(cnt)
    if area > 1000:
        if area > max_area:
            max_area = area
            best_cnt = cnt

cv2.drawContours(mask,[best_cnt],0,255,-1)
cv2.drawContours(mask,[best_cnt],0,0,2)

res = cv2.bitwise_and(res,mask)

Result :

3. Finding Vertical lines

kernelx = cv2.getStructuringElement(cv2.MORPH_RECT,(2,10))

dx = cv2.Sobel(res,cv2.CV_16S,1,0)
dx = cv2.convertScaleAbs(dx)
cv2.normalize(dx,dx,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dx,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernelx,iterations = 1)

contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
    x,y,w,h = cv2.boundingRect(cnt)
    if h/w > 5:
        cv2.drawContours(close,[cnt],0,255,-1)
    else:
        cv2.drawContours(close,[cnt],0,0,-1)
close = cv2.morphologyEx(close,cv2.MORPH_CLOSE,None,iterations = 2)
closex = close.copy()

Result :

4. Finding Horizontal Lines

kernely = cv2.getStructuringElement(cv2.MORPH_RECT,(10,2))
dy = cv2.Sobel(res,cv2.CV_16S,0,2)
dy = cv2.convertScaleAbs(dy)
cv2.normalize(dy,dy,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dy,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernely)

contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
    x,y,w,h = cv2.boundingRect(cnt)
    if w/h > 5:
        cv2.drawContours(close,[cnt],0,255,-1)
    else:
        cv2.drawContours(close,[cnt],0,0,-1)

close = cv2.morphologyEx(close,cv2.MORPH_DILATE,None,iterations = 2)
closey = close.copy()

Result :

Of course, this one is not so good.

5. Finding Grid Points

res = cv2.bitwise_and(closex,closey)

Result :

6. Correcting the defects

Here, nikie does some kind of interpolation, about which I don’t have much knowledge. And i couldn’t find any corresponding function for this OpenCV. (may be it is there, i don’t know).

Check out this SOF which explains how to do this using SciPy, which I don’t want to use : Image transformation in OpenCV

So, here I took 4 corners of each sub-square and applied warp Perspective to each.

For that, first we find the centroids.

contour, hier = cv2.findContours(res,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
centroids = []
for cnt in contour:
    mom = cv2.moments(cnt)
    (x,y) = int(mom['m10']/mom['m00']), int(mom['m01']/mom['m00'])
    cv2.circle(img,(x,y),4,(0,255,0),-1)
    centroids.append((x,y))

But resulting centroids won’t be sorted. Check out below image to see their order:

So we sort them from left to right, top to bottom.

centroids = np.array(centroids,dtype = np.float32)
c = centroids.reshape((100,2))
c2 = c[np.argsort(c[:,1])]

b = np.vstack([c2[i*10:(i+1)*10][np.argsort(c2[i*10:(i+1)*10,0])] for i in xrange(10)])
bm = b.reshape((10,10,2))

Now see below their order :

Finally we apply the transformation and create a new image of size 450×450.

output = np.zeros((450,450,3),np.uint8)
for i,j in enumerate(b):
    ri = i/10
    ci = i%10
    if ci != 9 and ri!=9:
        src = bm[ri:ri+2, ci:ci+2 , :].reshape((4,2))
        dst = np.array( [ [ci*50,ri*50],[(ci+1)*50-1,ri*50],[ci*50,(ri+1)*50-1],[(ci+1)*50-1,(ri+1)*50-1] ], np.float32)
        retval = cv2.getPerspectiveTransform(src,dst)
        warp = cv2.warpPerspective(res2,retval,(450,450))
        output[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1] = warp[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1].copy()

Result :

The result is almost same as nikie’s, but code length is large. May be, better methods are available out there, but until then, this works OK.

Regards ARK.


回答 2

您可以尝试对任意扭曲使用某种基于网格的建模。而且,由于数独已经是一个网格,所以它应该不会太难。

因此,您可以尝试检测每个3×3子区域的边界,然后分别对每个区域进行变形。如果检测成功,它将为您提供更好的近似值。

You could try to use some kind of grid based modeling of you arbitrary warping. And since the sudoku already is a grid, that shouldn’t be too hard.

So you could try to detect the boundaries of each 3×3 subregion and then warp each region individually. If the detection succeeds it would give you a better approximation.


回答 3

我想补充一点,上述方法仅在数独板直立时才有效,否则高度/宽度(反之亦然)的比率测试很可能会失败,并且您将无法检测数独的边缘。(我还想补充一点,如果不垂直于图像边界的线,则sobel操作(dx和dy)将仍然有效,因为线相对于两个轴仍然具有边缘。)

为了能够检测直线,您应该进行轮廓分析或逐像素分析,例如ContourArea / boundingRectArea,左上角和右下角点…

编辑:我设法通过应用线性回归并检查错误来检查一组轮廓是否形成一条线。但是,当直线的斜率太大(即> 1000)或非常接近0时,线性回归的效果较差。因此,在线性回归之前应用上述比率测试(在大多数赞成的答案中)是合乎逻辑的,对我来说确实有用。

I want to add that above method works only when sudoku board stands straight, otherwise height/width (or vice versa) ratio test will most probably fail and you will not be able to detect edges of sudoku. (I also want to add that if lines that are not perpendicular to the image borders, sobel operations (dx and dy) will still work as lines will still have edges with respect to both axes.)

To be able to detect straight lines you should work on contour or pixel-wise analysis such as contourArea/boundingRectArea, top left and bottom right points…

Edit: I managed to check whether a set of contours form a line or not by applying linear regression and checking the error. However linear regression performed poorly when slope of the line is too big (i.e. >1000) or it is very close to 0. Therefore applying the ratio test above (in most upvoted answer) before linear regression is logical and did work for me.


回答 4

为了去除未切割的角,我应用了伽玛校正,其伽玛值为0.8。

绘制红色圆圈以显示缺少的角。

代码是:

gamma = 0.8
invGamma = 1/gamma
table = np.array([((i / 255.0) ** invGamma) * 255
                  for i in np.arange(0, 256)]).astype("uint8")
cv2.LUT(img, table, img)

如果缺少某些关键点,这是对Abid Rahman的回答的补充。

To remove undected corners I applied gamma correction with a gamma value of 0.8.

The red circle is drawn to show the missing corner.

The code is:

gamma = 0.8
invGamma = 1/gamma
table = np.array([((i / 255.0) ** invGamma) * 255
                  for i in np.arange(0, 256)]).astype("uint8")
cv2.LUT(img, table, img)

This is in addition to Abid Rahman’s answer if some corner points are missing.


回答 5

我认为这是一个很棒的帖子,也是ARK的一个很好的解决方案。很好地布置和解释。

我正在研究类似的问题,并完成了整个工作。进行了一些更改(例如,xrange到range,cv2.findContours中的参数),但是应该可以立即使用(Python 3.5,Anaconda)。

这是上述元素的汇编,并添加了一些缺少的代码(即,标记点)。

'''

/programming/10196198/how-to-remove-convexity-defects-in-a-sudoku-square

'''

import cv2
import numpy as np

img = cv2.imread('test.png')

winname="raw image"
cv2.namedWindow(winname)
cv2.imshow(winname, img)
cv2.moveWindow(winname, 100,100)


img = cv2.GaussianBlur(img,(5,5),0)

winname="blurred"
cv2.namedWindow(winname)
cv2.imshow(winname, img)
cv2.moveWindow(winname, 100,150)

gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
mask = np.zeros((gray.shape),np.uint8)
kernel1 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11))

winname="gray"
cv2.namedWindow(winname)
cv2.imshow(winname, gray)
cv2.moveWindow(winname, 100,200)

close = cv2.morphologyEx(gray,cv2.MORPH_CLOSE,kernel1)
div = np.float32(gray)/(close)
res = np.uint8(cv2.normalize(div,div,0,255,cv2.NORM_MINMAX))
res2 = cv2.cvtColor(res,cv2.COLOR_GRAY2BGR)

winname="res2"
cv2.namedWindow(winname)
cv2.imshow(winname, res2)
cv2.moveWindow(winname, 100,250)

 #find elements
thresh = cv2.adaptiveThreshold(res,255,0,1,19,2)
img_c, contour,hier = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)

max_area = 0
best_cnt = None
for cnt in contour:
    area = cv2.contourArea(cnt)
    if area > 1000:
        if area > max_area:
            max_area = area
            best_cnt = cnt

cv2.drawContours(mask,[best_cnt],0,255,-1)
cv2.drawContours(mask,[best_cnt],0,0,2)

res = cv2.bitwise_and(res,mask)

winname="puzzle only"
cv2.namedWindow(winname)
cv2.imshow(winname, res)
cv2.moveWindow(winname, 100,300)

# vertical lines
kernelx = cv2.getStructuringElement(cv2.MORPH_RECT,(2,10))

dx = cv2.Sobel(res,cv2.CV_16S,1,0)
dx = cv2.convertScaleAbs(dx)
cv2.normalize(dx,dx,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dx,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernelx,iterations = 1)

img_d, contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
    x,y,w,h = cv2.boundingRect(cnt)
    if h/w > 5:
        cv2.drawContours(close,[cnt],0,255,-1)
    else:
        cv2.drawContours(close,[cnt],0,0,-1)
close = cv2.morphologyEx(close,cv2.MORPH_CLOSE,None,iterations = 2)
closex = close.copy()

winname="vertical lines"
cv2.namedWindow(winname)
cv2.imshow(winname, img_d)
cv2.moveWindow(winname, 100,350)

# find horizontal lines
kernely = cv2.getStructuringElement(cv2.MORPH_RECT,(10,2))
dy = cv2.Sobel(res,cv2.CV_16S,0,2)
dy = cv2.convertScaleAbs(dy)
cv2.normalize(dy,dy,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dy,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernely)

img_e, contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)

for cnt in contour:
    x,y,w,h = cv2.boundingRect(cnt)
    if w/h > 5:
        cv2.drawContours(close,[cnt],0,255,-1)
    else:
        cv2.drawContours(close,[cnt],0,0,-1)

close = cv2.morphologyEx(close,cv2.MORPH_DILATE,None,iterations = 2)
closey = close.copy()

winname="horizontal lines"
cv2.namedWindow(winname)
cv2.imshow(winname, img_e)
cv2.moveWindow(winname, 100,400)


# intersection of these two gives dots
res = cv2.bitwise_and(closex,closey)

winname="intersections"
cv2.namedWindow(winname)
cv2.imshow(winname, res)
cv2.moveWindow(winname, 100,450)

# text blue
textcolor=(0,255,0)
# points green
pointcolor=(255,0,0)

# find centroids and sort
img_f, contour, hier = cv2.findContours(res,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
centroids = []
for cnt in contour:
    mom = cv2.moments(cnt)
    (x,y) = int(mom['m10']/mom['m00']), int(mom['m01']/mom['m00'])
    cv2.circle(img,(x,y),4,(0,255,0),-1)
    centroids.append((x,y))

# sorting
centroids = np.array(centroids,dtype = np.float32)
c = centroids.reshape((100,2))
c2 = c[np.argsort(c[:,1])]

b = np.vstack([c2[i*10:(i+1)*10][np.argsort(c2[i*10:(i+1)*10,0])] for i in range(10)])
bm = b.reshape((10,10,2))

# make copy
labeled_in_order=res2.copy()

for index, pt in enumerate(b):
    cv2.putText(labeled_in_order,str(index),tuple(pt),cv2.FONT_HERSHEY_DUPLEX, 0.75, textcolor)
    cv2.circle(labeled_in_order, tuple(pt), 5, pointcolor)

winname="labeled in order"
cv2.namedWindow(winname)
cv2.imshow(winname, labeled_in_order)
cv2.moveWindow(winname, 100,500)

# create final

output = np.zeros((450,450,3),np.uint8)
for i,j in enumerate(b):
    ri = int(i/10) # row index
    ci = i%10 # column index
    if ci != 9 and ri!=9:
        src = bm[ri:ri+2, ci:ci+2 , :].reshape((4,2))
        dst = np.array( [ [ci*50,ri*50],[(ci+1)*50-1,ri*50],[ci*50,(ri+1)*50-1],[(ci+1)*50-1,(ri+1)*50-1] ], np.float32)
        retval = cv2.getPerspectiveTransform(src,dst)
        warp = cv2.warpPerspective(res2,retval,(450,450))
        output[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1] = warp[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1].copy()

winname="final"
cv2.namedWindow(winname)
cv2.imshow(winname, output)
cv2.moveWindow(winname, 600,100)

cv2.waitKey(0)
cv2.destroyAllWindows()

I thought this was a great post, and a great solution by ARK; very well laid out and explained.

I was working on a similar problem, and built the entire thing. There were some changes (i.e. xrange to range, arguments in cv2.findContours), but this should work out of the box (Python 3.5, Anaconda).

This is a compilation of the elements above, with some of the missing code added (i.e., labeling of points).

'''

https://stackoverflow.com/questions/10196198/how-to-remove-convexity-defects-in-a-sudoku-square

'''

import cv2
import numpy as np

img = cv2.imread('test.png')

winname="raw image"
cv2.namedWindow(winname)
cv2.imshow(winname, img)
cv2.moveWindow(winname, 100,100)


img = cv2.GaussianBlur(img,(5,5),0)

winname="blurred"
cv2.namedWindow(winname)
cv2.imshow(winname, img)
cv2.moveWindow(winname, 100,150)

gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
mask = np.zeros((gray.shape),np.uint8)
kernel1 = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(11,11))

winname="gray"
cv2.namedWindow(winname)
cv2.imshow(winname, gray)
cv2.moveWindow(winname, 100,200)

close = cv2.morphologyEx(gray,cv2.MORPH_CLOSE,kernel1)
div = np.float32(gray)/(close)
res = np.uint8(cv2.normalize(div,div,0,255,cv2.NORM_MINMAX))
res2 = cv2.cvtColor(res,cv2.COLOR_GRAY2BGR)

winname="res2"
cv2.namedWindow(winname)
cv2.imshow(winname, res2)
cv2.moveWindow(winname, 100,250)

 #find elements
thresh = cv2.adaptiveThreshold(res,255,0,1,19,2)
img_c, contour,hier = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)

max_area = 0
best_cnt = None
for cnt in contour:
    area = cv2.contourArea(cnt)
    if area > 1000:
        if area > max_area:
            max_area = area
            best_cnt = cnt

cv2.drawContours(mask,[best_cnt],0,255,-1)
cv2.drawContours(mask,[best_cnt],0,0,2)

res = cv2.bitwise_and(res,mask)

winname="puzzle only"
cv2.namedWindow(winname)
cv2.imshow(winname, res)
cv2.moveWindow(winname, 100,300)

# vertical lines
kernelx = cv2.getStructuringElement(cv2.MORPH_RECT,(2,10))

dx = cv2.Sobel(res,cv2.CV_16S,1,0)
dx = cv2.convertScaleAbs(dx)
cv2.normalize(dx,dx,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dx,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernelx,iterations = 1)

img_d, contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
    x,y,w,h = cv2.boundingRect(cnt)
    if h/w > 5:
        cv2.drawContours(close,[cnt],0,255,-1)
    else:
        cv2.drawContours(close,[cnt],0,0,-1)
close = cv2.morphologyEx(close,cv2.MORPH_CLOSE,None,iterations = 2)
closex = close.copy()

winname="vertical lines"
cv2.namedWindow(winname)
cv2.imshow(winname, img_d)
cv2.moveWindow(winname, 100,350)

# find horizontal lines
kernely = cv2.getStructuringElement(cv2.MORPH_RECT,(10,2))
dy = cv2.Sobel(res,cv2.CV_16S,0,2)
dy = cv2.convertScaleAbs(dy)
cv2.normalize(dy,dy,0,255,cv2.NORM_MINMAX)
ret,close = cv2.threshold(dy,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
close = cv2.morphologyEx(close,cv2.MORPH_DILATE,kernely)

img_e, contour, hier = cv2.findContours(close,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)

for cnt in contour:
    x,y,w,h = cv2.boundingRect(cnt)
    if w/h > 5:
        cv2.drawContours(close,[cnt],0,255,-1)
    else:
        cv2.drawContours(close,[cnt],0,0,-1)

close = cv2.morphologyEx(close,cv2.MORPH_DILATE,None,iterations = 2)
closey = close.copy()

winname="horizontal lines"
cv2.namedWindow(winname)
cv2.imshow(winname, img_e)
cv2.moveWindow(winname, 100,400)


# intersection of these two gives dots
res = cv2.bitwise_and(closex,closey)

winname="intersections"
cv2.namedWindow(winname)
cv2.imshow(winname, res)
cv2.moveWindow(winname, 100,450)

# text blue
textcolor=(0,255,0)
# points green
pointcolor=(255,0,0)

# find centroids and sort
img_f, contour, hier = cv2.findContours(res,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
centroids = []
for cnt in contour:
    mom = cv2.moments(cnt)
    (x,y) = int(mom['m10']/mom['m00']), int(mom['m01']/mom['m00'])
    cv2.circle(img,(x,y),4,(0,255,0),-1)
    centroids.append((x,y))

# sorting
centroids = np.array(centroids,dtype = np.float32)
c = centroids.reshape((100,2))
c2 = c[np.argsort(c[:,1])]

b = np.vstack([c2[i*10:(i+1)*10][np.argsort(c2[i*10:(i+1)*10,0])] for i in range(10)])
bm = b.reshape((10,10,2))

# make copy
labeled_in_order=res2.copy()

for index, pt in enumerate(b):
    cv2.putText(labeled_in_order,str(index),tuple(pt),cv2.FONT_HERSHEY_DUPLEX, 0.75, textcolor)
    cv2.circle(labeled_in_order, tuple(pt), 5, pointcolor)

winname="labeled in order"
cv2.namedWindow(winname)
cv2.imshow(winname, labeled_in_order)
cv2.moveWindow(winname, 100,500)

# create final

output = np.zeros((450,450,3),np.uint8)
for i,j in enumerate(b):
    ri = int(i/10) # row index
    ci = i%10 # column index
    if ci != 9 and ri!=9:
        src = bm[ri:ri+2, ci:ci+2 , :].reshape((4,2))
        dst = np.array( [ [ci*50,ri*50],[(ci+1)*50-1,ri*50],[ci*50,(ri+1)*50-1],[(ci+1)*50-1,(ri+1)*50-1] ], np.float32)
        retval = cv2.getPerspectiveTransform(src,dst)
        warp = cv2.warpPerspective(res2,retval,(450,450))
        output[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1] = warp[ri*50:(ri+1)*50-1 , ci*50:(ci+1)*50-1].copy()

winname="final"
cv2.namedWindow(winname)
cv2.imshow(winname, output)
cv2.moveWindow(winname, 600,100)

cv2.waitKey(0)
cv2.destroyAllWindows()