First, what happens when T0 is not None? I would test that, then I would adjust the values I pass to plt.subplot(); maybe try values 131, 132, and 133, or values that depend whether or not T0 exists.
Second, after plt.show() is called, a new figure is created. To deal with this, you can
Call plt.savefig('tessstttyyy.png', dpi=100) before you call plt.show()
Save the figure before you show() by calling plt.gcf() for “get current figure”, then you can call savefig() on this Figure object at any time.
import matplotlib.pyplot as pltimport matplotlib.patches as patchesfrom PIL importImageimport numpy as np
im = np.array(Image.open('stinkbug.png'), dtype=np.uint8)# Create figure and axes
fig,ax = plt.subplots(1)# Display the image
ax.imshow(im)# Create a Rectangle patch
rect = patches.Rectangle((50,100),40,30,linewidth=1,edgecolor='r',facecolor='none')# Add the patch to the Axes
ax.add_patch(rect)
plt.show()
You can add a Rectangle patch to the matplotlib Axes.
For example (using the image from the tutorial here):
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import numpy as np
im = np.array(Image.open('stinkbug.png'), dtype=np.uint8)
# Create figure and axes
fig,ax = plt.subplots(1)
# Display the image
ax.imshow(im)
# Create a Rectangle patch
rect = patches.Rectangle((50,100),40,30,linewidth=1,edgecolor='r',facecolor='none')
# Add the patch to the Axes
ax.add_patch(rect)
plt.show()
import matplotlib.pyplot as pltfrom matplotlib.patches importRectanglefrom PIL importImage
im =Image.open('stinkbug.png')# Display the image
plt.imshow(im)# Get the current reference
ax = plt.gca()# Create a Rectangle patch
rect =Rectangle((50,100),40,30,linewidth=1,edgecolor='r',facecolor='none')# Add the patch to the Axes
ax.add_patch(rect)
或者,简短版本:
import matplotlib.pyplot as pltfrom matplotlib.patches importRectanglefrom PIL importImage# Display the image
plt.imshow(Image.open('stinkbug.png'))# Add the patch to the Axes
plt.gca().add_patch(Rectangle((50,100),40,30,linewidth=1,edgecolor='r',facecolor='none'))
There is no need for subplots, and pyplot can display PIL images, so this can be simplified further:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image
im = Image.open('stinkbug.png')
# Display the image
plt.imshow(im)
# Get the current reference
ax = plt.gca()
# Create a Rectangle patch
rect = Rectangle((50,100),40,30,linewidth=1,edgecolor='r',facecolor='none')
# Add the patch to the Axes
ax.add_patch(rect)
Or, the short version:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image
# Display the image
plt.imshow(Image.open('stinkbug.png'))
# Add the patch to the Axes
plt.gca().add_patch(Rectangle((50,100),40,30,linewidth=1,edgecolor='r',facecolor='none'))
This should give you a dictionary indexed by EXIF numeric tags. If you want the dictionary indexed by the actual EXIF tag name strings, try something like:
import PIL.ExifTags
exif = {
PIL.ExifTags.TAGS[k]: v
for k, v in img._getexif().items()
if k in PIL.ExifTags.TAGS
}
import os,sys
from PIL import Image
from PIL.ExifTags import TAGS
for (k,v) in Image.open(sys.argv[1])._getexif().items():
print('%s = %s' % (TAGS.get(k), v))
or to get a specific field:
def get_field (exif,field) :
for (k,v) in exif.items():
if TAGS.get(k) == field:
return v
exif = image._getexif()
print get_field(exif,'ExposureTime')
For Python3.x and starting Pillow==6.0.0, Image objects now provide a getexif() method that returns <class 'PIL.Image.Exif'> or None if the image has no EXIF data.
getexif() has been added, which returns an Exif instance. Values can
be retrieved and set like a dictionary. When saving JPEG, PNG or WEBP,
the instance can be passed as an exif argument to include any changes
in the output image.
As stated, the Exif output can simply be casted to a dict with the EXIF data accessible as regular key-value pairs. The keys are 16-bit integers that can be mapped to their string names using the ExifTags.TAGS module.
from PIL import Image, ExifTags
img = Image.open("sample.jpg")
img_exif = img.getexif()
print(type(img_exif))
# <class 'PIL.Image.Exif'>
if img_exif is None:
print("Sorry, image has no exif data.")
else:
img_exif_dict = dict(img_exif)
print(img_exif_dict)
# { ... 42035: 'FUJIFILM', 42036: 'XF23mmF2 R WR', 42037: '75A14188' ... }
for key, val in img_exif_dict.items():
if key in ExifTags.TAGS:
print(f"{ExifTags.TAGS[key]}:{repr(val)}")
# ExifVersion:b'0230'
# ...
# FocalLength:(2300, 100)
# ColorSpace:1
# FocalLengthIn35mmFilm:35
# ...
# Model:'X-T2'
# Make:'FUJIFILM'
# ...
# DateTime:'2019:12:01 21:30:07'
# ...
Tested with Python 3.6.8 and Pillow==6.0.0.
回答 4
import sys
import PIL
import PIL.ImageasPILimagefrom PIL importImageDraw,ImageFont,ImageEnhancefrom PIL.ExifTagsimport TAGS, GPSTAGS
classWorker(object):def __init__(self, img):
self.img = img
self.exif_data = self.get_exif_data()
self.lat = self.get_lat()
self.lon = self.get_lon()
self.date =self.get_date_time()
super(Worker, self).__init__()@staticmethoddef get_if_exist(data, key):if key in data:return data[key]returnNone@staticmethoddef convert_to_degress(value):"""Helper function to convert the GPS coordinates
stored in the EXIF to degress in float format"""
d0 = value[0][0]
d1 = value[0][1]
d = float(d0)/ float(d1)
m0 = value[1][0]
m1 = value[1][1]
m = float(m0)/ float(m1)
s0 = value[2][0]
s1 = value[2][1]
s = float(s0)/ float(s1)return d +(m /60.0)+(s /3600.0)def get_exif_data(self):"""Returns a dictionary from the exif data of an PIL Image item. Also
converts the GPS Tags"""
exif_data ={}
info = self.img._getexif()if info:for tag, value in info.items():
decoded = TAGS.get(tag, tag)if decoded =="GPSInfo":
gps_data ={}for t in value:
sub_decoded = GPSTAGS.get(t, t)
gps_data[sub_decoded]= value[t]
exif_data[decoded]= gps_data
else:
exif_data[decoded]= value
return exif_data
def get_lat(self):"""Returns the latitude and longitude, if available, from the
provided exif_data (obtained through get_exif_data above)"""# print(exif_data)if'GPSInfo'in self.exif_data:
gps_info = self.exif_data["GPSInfo"]
gps_latitude = self.get_if_exist(gps_info,"GPSLatitude")
gps_latitude_ref = self.get_if_exist(gps_info,'GPSLatitudeRef')if gps_latitude and gps_latitude_ref:
lat = self.convert_to_degress(gps_latitude)if gps_latitude_ref !="N":
lat =0- lat
lat = str(f"{lat:.{5}f}")return lat
else:returnNonedef get_lon(self):"""Returns the latitude and longitude, if available, from the
provided exif_data (obtained through get_exif_data above)"""# print(exif_data)if'GPSInfo'in self.exif_data:
gps_info = self.exif_data["GPSInfo"]
gps_longitude = self.get_if_exist(gps_info,'GPSLongitude')
gps_longitude_ref = self.get_if_exist(gps_info,'GPSLongitudeRef')if gps_longitude and gps_longitude_ref:
lon = self.convert_to_degress(gps_longitude)if gps_longitude_ref !="E":
lon =0- lon
lon = str(f"{lon:.{5}f}")return lon
else:returnNonedef get_date_time(self):if'DateTime'in self.exif_data:
date_and_time = self.exif_data['DateTime']return date_and_time
if __name__ =='__main__':try:
img =PILimage.open(sys.argv[1])
image =Worker(img)
lat = image.lat
lon = image.lon
date = image.date
print(date, lat, lon)exceptExceptionas e:print(e)
import sys
import PIL
import PIL.Image as PILimage
from PIL import ImageDraw, ImageFont, ImageEnhance
from PIL.ExifTags import TAGS, GPSTAGS
class Worker(object):
def __init__(self, img):
self.img = img
self.exif_data = self.get_exif_data()
self.lat = self.get_lat()
self.lon = self.get_lon()
self.date =self.get_date_time()
super(Worker, self).__init__()
@staticmethod
def get_if_exist(data, key):
if key in data:
return data[key]
return None
@staticmethod
def convert_to_degress(value):
"""Helper function to convert the GPS coordinates
stored in the EXIF to degress in float format"""
d0 = value[0][0]
d1 = value[0][1]
d = float(d0) / float(d1)
m0 = value[1][0]
m1 = value[1][1]
m = float(m0) / float(m1)
s0 = value[2][0]
s1 = value[2][1]
s = float(s0) / float(s1)
return d + (m / 60.0) + (s / 3600.0)
def get_exif_data(self):
"""Returns a dictionary from the exif data of an PIL Image item. Also
converts the GPS Tags"""
exif_data = {}
info = self.img._getexif()
if info:
for tag, value in info.items():
decoded = TAGS.get(tag, tag)
if decoded == "GPSInfo":
gps_data = {}
for t in value:
sub_decoded = GPSTAGS.get(t, t)
gps_data[sub_decoded] = value[t]
exif_data[decoded] = gps_data
else:
exif_data[decoded] = value
return exif_data
def get_lat(self):
"""Returns the latitude and longitude, if available, from the
provided exif_data (obtained through get_exif_data above)"""
# print(exif_data)
if 'GPSInfo' in self.exif_data:
gps_info = self.exif_data["GPSInfo"]
gps_latitude = self.get_if_exist(gps_info, "GPSLatitude")
gps_latitude_ref = self.get_if_exist(gps_info, 'GPSLatitudeRef')
if gps_latitude and gps_latitude_ref:
lat = self.convert_to_degress(gps_latitude)
if gps_latitude_ref != "N":
lat = 0 - lat
lat = str(f"{lat:.{5}f}")
return lat
else:
return None
def get_lon(self):
"""Returns the latitude and longitude, if available, from the
provided exif_data (obtained through get_exif_data above)"""
# print(exif_data)
if 'GPSInfo' in self.exif_data:
gps_info = self.exif_data["GPSInfo"]
gps_longitude = self.get_if_exist(gps_info, 'GPSLongitude')
gps_longitude_ref = self.get_if_exist(gps_info, 'GPSLongitudeRef')
if gps_longitude and gps_longitude_ref:
lon = self.convert_to_degress(gps_longitude)
if gps_longitude_ref != "E":
lon = 0 - lon
lon = str(f"{lon:.{5}f}")
return lon
else:
return None
def get_date_time(self):
if 'DateTime' in self.exif_data:
date_and_time = self.exif_data['DateTime']
return date_and_time
if __name__ == '__main__':
try:
img = PILimage.open(sys.argv[1])
image = Worker(img)
lat = image.lat
lon = image.lon
date = image.date
print(date, lat, lon)
except Exception as e:
print(e)
I have found that using ._getexif doesn’t work in higher python versions, moreover, it is a protected class and one should avoid using it if possible.
After digging around the debugger this is what I found to be the best way to get the EXIF data for an image:
from PIL import Image
def get_exif(path):
return Image.open(path).info['parsed_exif']
This returns a dictionary of all the EXIF data of an image.
Note: For Python3.x use Pillow instead of PIL
回答 6
这是一个可能更容易阅读的内容。希望这会有所帮助。
from PIL importImagefrom PIL importExifTags
exifData ={}
img =Image.open(picture.jpg)
exifDataRaw = img._getexif()for tag, value in exifDataRaw.items():
decodedTag =ExifTags.TAGS.get(tag, tag)
exifData[decodedTag]= value
Here’s the one that may be little easier to read. Hope this is helpful.
from PIL import Image
from PIL import ExifTags
exifData = {}
img = Image.open(picture.jpg)
exifDataRaw = img._getexif()
for tag, value in exifDataRaw.items():
decodedTag = ExifTags.TAGS.get(tag, tag)
exifData[decodedTag] = value
# In Python 2.7
fh = open("imageToSave.png","wb")
fh.write(img_data.decode('base64'))
fh.close()# or, more concisely using with statementwith open("imageToSave.png","wb")as fh:
fh.write(img_data.decode('base64'))
Decoded the data using the base64 codec, and then write it to the filesystem.
# In Python 2.7
fh = open("imageToSave.png", "wb")
fh.write(img_data.decode('base64'))
fh.close()
# or, more concisely using with statement
with open("imageToSave.png", "wb") as fh:
fh.write(img_data.decode('base64'))
Modernizing this example to Python 3, which removed arbitrary codec support from string/bytes .encode() and .decode() functions:
# For both Python 2.7 and Python 3.x
import base64
with open("imageToSave.png", "wb") as fh:
fh.write(base64.decodebytes(img_data))
回答 1
如果imagestr是位图数据(我们现在知道不是),则可以使用它
imagestr是base64编码的字符串 width是图像的宽度是图像 height的高度
from PIL importImagefrom base64 import decodestring
image =Image.fromstring('RGB',(width,height),decodestring(imagestr))
image.save("foo.png")
由于imagestr只是编码的png数据
from base64 import decodestring
with open("foo.png","wb")as f:
f.write(decodestring(imagestr))
If the imagestr was bitmap data (which we now know it isn’t) you could use this
imagestr is the base64 encoded string width is the width of the image height is the height of the image
from PIL import Image
from base64 import decodestring
image = Image.fromstring('RGB',(width,height),decodestring(imagestr))
image.save("foo.png")
Since the imagestr is just the encoded png data
from base64 import decodestring
with open("foo.png","wb") as f:
f.write(decodestring(imagestr))
回答 2
您也可以将其保存到字符串缓冲区,然后根据需要进行操作,
import cStringIO
data = json.loads(request.POST['imgData'])# Getting the object from the post request
image_output = cStringIO.StringIO()
image_output.write(data.decode('base64'))# Write decoded image to buffer
image_output.seek(0)# seek beginning of the image string# image_output.read() # Do as you wish with it!
在django中,您可以将其另存为上传文件以保存到模型中:
from django.core.files.uploadedfile importSimpleUploadedFile
suf =SimpleUploadedFile('uploaded_file.png', image_output.read(), content_type='image/png')
You can also save it to a string buffer and then do as you wish with it,
import cStringIO
data = json.loads(request.POST['imgData']) # Getting the object from the post request
image_output = cStringIO.StringIO()
image_output.write(data.decode('base64')) # Write decoded image to buffer
image_output.seek(0) # seek beginning of the image string
# image_output.read() # Do as you wish with it!
In django, you can save it as an uploaded file to save to a model:
from django.core.files.uploadedfile import SimpleUploadedFile
suf = SimpleUploadedFile('uploaded_file.png', image_output.read(), content_type='image/png')
You could probably use the PyPNG package’s png.Reader object to do this – decode the base64 string into a regular string (via the base64 standard library), and pass it to the constructor.
回答 6
试试这个解决方案,
图像文件->二进制编码的字符串
二进制编码的字符串->图像文件
import base64
"""
1st step - convert image into binary
"""with open("original_image.png","rb")as original_file:
encoded_string = base64.b64encode(original_file.read())print(encoded_string)# xmzWowsfJbpGwCe0DTveqwvos7Mf0lcVNe/Q+G1hO/p+UNPd/stUse8AhP/3fDixf8HI3No67nvhlYAAAAASUVORK5CYII='print(type(encoded_string))# <class 'bytes'>"""
2nd step - create new image using the encoded string
"""with open("new_image.png","wb")as new_file:
new_file.write(base64.decodebytes(encoded_string))
I understand that you can get the image size using PIL in the following fashion
from PIL import Image
im = Image.open(image_filename)
width, height = im.size
However, I would like to get the image width and height without having to load the image in memory. Is that possible? I am only doing statistics on image sizes and dont care for the image contents. I just want to make my processing faster.
whileTrue:
s = s + self.fp.read(1)
i = i16(s)if i in MARKER:
name, description, handler = MARKER[i]# print hex(i), name, descriptionif handler isnotNone:
handler(self, i)if i ==0xFFDA:# start of scan
rawmode = self.mode
if self.mode =="CMYK":
rawmode ="CMYK;I"# assume adobe conventions
self.tile =[("jpeg",(0,0)+ self.size,0,(rawmode,""))]# self.__offset = self.fp.tell()break
s = self.fp.read(1)elif i ==0or i ==65535:# padded marker or junk; move on
s ="\xff"else:raiseSyntaxError("no marker found")
As the comments allude, PIL does not load the image into memory when calling .open. Looking at the docs of PIL 1.1.7, the docstring for .open says:
def open(fp, mode="r"):
"Open an image file, without loading the raster data"
There are a few file operations in the source like:
...
prefix = fp.read(16)
...
fp.seek(0)
...
but these hardly constitute reading the whole file. In fact .open simply returns a file object and the filename on success. In addition the docs say:
open(file, mode=”r”)
Opens and identifies the given image file.
This is a lazy operation; this function identifies the file, but the actual image data is not read from the file until you try to process the data (or call the load method).
Digging deeper, we see that .open calls _open which is a image-format specific overload. Each of the implementations to _open can be found in a new file, eg. .jpeg files are in JpegImagePlugin.py. Let’s look at that one in depth.
Here things seem to get a bit tricky, in it there is an infinite loop that gets broken out of when the jpeg marker is found:
while True:
s = s + self.fp.read(1)
i = i16(s)
if i in MARKER:
name, description, handler = MARKER[i]
# print hex(i), name, description
if handler is not None:
handler(self, i)
if i == 0xFFDA: # start of scan
rawmode = self.mode
if self.mode == "CMYK":
rawmode = "CMYK;I" # assume adobe conventions
self.tile = [("jpeg", (0,0) + self.size, 0, (rawmode, ""))]
# self.__offset = self.fp.tell()
break
s = self.fp.read(1)
elif i == 0 or i == 65535:
# padded marker or junk; move on
s = "\xff"
else:
raise SyntaxError("no marker found")
Which looks like it could read the whole file if it was malformed. If it reads the info marker OK however, it should break out early. The function handler ultimately sets self.size which are the dimensions of the image.
回答 1
如果您不关心图像内容,则PIL可能是一个过大的选择。
我建议解析python magic模块的输出:
>>> t = magic.from_file('teste.png')>>> t
'PNG image data, 782 x 602, 8-bit/color RGBA, non-interlaced'>>> re.search('(\d+) x (\d+)', t).groups()('782','602')
I often fetch image sizes on the Internet. Of course, you can’t download the image and then load it to parse the information. It’s too time consuming. My method is to feed chunks to an image container and test whether it can parse the image every time. Stop the loop when I get the information I want.
I extracted the core of my code and modified it to parse local files.
from PIL import ImageFile
ImPar=ImageFile.Parser()
with open(r"D:\testpic\test.jpg", "rb") as f:
ImPar=ImageFile.Parser()
chunk = f.read(2048)
count=2048
while chunk != "":
ImPar.feed(chunk)
if ImPar.image:
break
chunk = f.read(2048)
count+=2048
print(ImPar.image.size)
print(count)
Output:
(2240, 1488)
38912
The actual file size is 1,543,580 bytes and you only read 38,912 bytes to get the image size. Hope this will help.
Another short way of doing it on Unix systems. It depends on the output of file which I am not sure is standardized on all systems. This should probably not be used in production code. Moreover most JPEGs don’t report the image size.
The ‘Image.fromarray’ is a little ugly because I clip incoming data to [0,255], convert to bytes, then create a grayscale image. I mostly work in gray.
import numpy as np
def img_as_array(im):"""OpenCV's native format to a numpy array view"""
w, h, n = im.width, im.height, im.channels
modes ={1:"L",3:"RGB",4:"RGBA"}if n notin modes:raiseException('unsupported number of channels: {0}'.format(n))
out = np.asarray(im)if n !=1:
out = out[:,:,::-1]# BGR -> RGB conversionreturn out
OpenCV image format supports the numpy array interface. A helper function can be made to support either grayscale or color images. This means the BGR -> RGB conversion can be conveniently done with a numpy slice, not a full copy of image data.
Note: this is a stride trick, so modifying the output array will also change the OpenCV image data. If you want a copy, use .copy() method on the array!
import numpy as np
def img_as_array(im):
"""OpenCV's native format to a numpy array view"""
w, h, n = im.width, im.height, im.channels
modes = {1: "L", 3: "RGB", 4: "RGBA"}
if n not in modes:
raise Exception('unsupported number of channels: {0}'.format(n))
out = np.asarray(im)
if n != 1:
out = out[:, :, ::-1] # BGR -> RGB conversion
return out
回答 8
我也采用了imageio,但发现以下机器可用于预处理和后期处理:
import imageio
import numpy as np
def imload(*a,**k):
i = imageio.imread(*a,**k)
i = i.transpose((1,0,2))# x and y are mixed up for some reason...
i = np.flip(i,1)# make coordinate system right-handed!!!!!!return i/255def imsave(i, url,*a,**k):# Original order of arguments was counterintuitive. It should# read verbally "Save the image to the URL" — not "Save to the# URL the image."
i = np.flip(i,1)
i = i.transpose((1,0,2))
i *=255
i = i.round()
i = np.maximum(i,0)
i = np.minimum(i,255)
i = np.asarray(i, dtype=np.uint8)
imageio.imwrite(url, i,*a,**k)
I also adopted imageio, but I found the following machinery useful for pre- and post-processing:
import imageio
import numpy as np
def imload(*a, **k):
i = imageio.imread(*a, **k)
i = i.transpose((1, 0, 2)) # x and y are mixed up for some reason...
i = np.flip(i, 1) # make coordinate system right-handed!!!!!!
return i/255
def imsave(i, url, *a, **k):
# Original order of arguments was counterintuitive. It should
# read verbally "Save the image to the URL" — not "Save to the
# URL the image."
i = np.flip(i, 1)
i = i.transpose((1, 0, 2))
i *= 255
i = i.round()
i = np.maximum(i, 0)
i = np.minimum(i, 255)
i = np.asarray(i, dtype=np.uint8)
imageio.imwrite(url, i, *a, **k)
The rationale is that I am using numpy for image processing, not just image displaying. For this purpose, uint8s are awkward, so I convert to floating point values ranging from 0 to 1.
When saving images, I noticed I had to cut the out-of-range values myself, or else I ended up with a really gray output. (The gray output was the result of imageio compressing the full range, which was outside of [0, 256), to values that were inside the range.)
There were a couple other oddities, too, which I mentioned in the comments.
回答 9
您可以使用numpy和轻松获得RGB图片的numpy数组Image from PIL
import numpy as np
from PIL importImageimport matplotlib.pyplot as plt
im =Image.open('*image_name*')#These two lines
im_arr = np.array(im)#are all you need
plt.imshow(im_arr)#Just to verify that image array has been constructed properly
You can get numpy array of rgb image easily by using numpy and Image from PIL
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
im = Image.open('*image_name*') #These two lines
im_arr = np.array(im) #are all you need
plt.imshow(im_arr) #Just to verify that image array has been constructed properly
回答 10
使用以下语法加载图像:
from keras.preprocessing import image
X_test=image.load_img('four.png',target_size=(28,28),color_mode="grayscale");#loading image and then convert it into grayscale and with it's target size
X_test=image.img_to_array(X_test);#convert image into array
from keras.preprocessing import image
X_test=image.load_img('four.png',target_size=(28,28),color_mode="grayscale"); #loading image and then convert it into grayscale and with it's target size
X_test=image.img_to_array(X_test); #convert image into array
How to get the size of an image in cv2 wrapper in Python OpenCV (numpy). Is there a correct way to do that other than numpy.shape(). How can I get it in these format dimensions: (width, height) list?
cv2 uses numpy for manipulating images, so the proper and best way to get the size of an image is using numpy.shape. Assuming you are working with BGR images, here is an example:
I’m afraid there is no “better” way to get this size, however it’s not that much pain.
Of course your code should be safe for both binary/mono images as well as multi-channel ones, but the principal dimensions of the image always come first in the numpy array’s shape. If you opt for readability, or don’t want to bother typing this, you can wrap it up in a function, and give it a name you like, e.g. cv_size:
Writing functions with def is not fun while working interactively.
Edit
Originally I thought that using [:2] was OK, but the numpy shape is (height, width[, depth]), and we need (width, height), as e.g. cv2.resize expects, so – we must use [1::-1]. Even less memorable than [:2]. And who remembers reverse slicing anyway?
In python code, how to efficiently save a certain page in a pdf as a jpeg file? (Use case: I’ve a python flask web server where pdf-s will be uploaded and jpeg-s corresponding to each page is stores.)
This solution is close, but the problem is that it does not convert the entire page to jpeg.
回答 0
可以使用pdf2image库。
您可以使用以下方法简单地安装它:
pip install pdf2image
安装后,您可以使用以下代码获取图像。
from pdf2image import convert_from_path
pages = convert_from_path('pdf_file',500)
Once installed you can use following code to get images.
from pdf2image import convert_from_path
pages = convert_from_path('pdf_file', 500)
Saving pages in jpeg format
for page in pages:
page.save('out.jpg', 'JPEG')
Edit: the Github repo pdf2image also mentions that it uses pdftoppm and that it requires other installations:
pdftoppm is the piece of software that does the actual magic. It is distributed as part of a greater package called poppler.
Windows users will have to install poppler for Windows.
Mac users will have to install poppler for Mac.
Linux users will have pdftoppm pre-installed with the distro (Tested on Ubuntu and Archlinux) if it’s not, run sudo apt install poppler-utils.
You can install the latest version under Windows using anaconda by doing:
The Python library pdf2image (used in the other answer) in fact doesn’t do much more than just launchingpdttoppm with subprocess.Popen, so here is a short version doing it directly:
from wand.image importImage
f ="somefile.pdf"with(Image(filename=f, resolution=120))as source:for i, image in enumerate(source.sequence):
newfilename = f[:-4]+ str(i +1)+'.jpeg'Image(image).save(filename=newfilename)
There is no need to install Poppler on your OS. This will work:
pip install Wand
from wand.image import Image
f = "somefile.pdf"
with(Image(filename=f, resolution=120)) as source:
for i, image in enumerate(source.sequence):
newfilename = f[:-4] + str(i + 1) + '.jpeg'
Image(image).save(filename=newfilename)
@gaurwraith, install poppler for Windows and use pdftoppm.exe as follows:
Download zip file with Poppler’s latest binaries/dlls from http://blog.alivate.com.au/poppler-windows/ and unzip to a new folder in your program files folder. For example: “C:\Program Files (x86)\Poppler”.
Add “C:\Program Files (x86)\Poppler\poppler-0.68.0\bin” to your SYSTEM PATH environment variable.
From cmd line install pdf2image module -> “pip install pdf2image”.
Or alternatively, directly execute pdftoppm.exe from your code using Python’s subprocess module as explained by user Basj.
@vishvAs vAsuki, this code should generate the jpgs you want through the subprocess module for all pages of one or more pdfs in a given folder:
import os, subprocess
pdf_dir = r"C:\yourPDFfolder"
os.chdir(pdf_dir)
pdftoppm_path = r"C:\Program Files (x86)\Poppler\poppler-0.68.0\bin\pdftoppm.exe"
for pdf_file in os.listdir(pdf_dir):
if pdf_file.endswith(".pdf"):
subprocess.Popen('"%s" -jpeg %s out' % (pdftoppm_path, pdf_file))
Or using the pdf2image module:
import os
from pdf2image import convert_from_path
pdf_dir = r"C:\yourPDFfolder"
os.chdir(pdf_dir)
for pdf_file in os.listdir(pdf_dir):
if pdf_file.endswith(".pdf"):
pages = convert_from_path(pdf_file, 300)
pdf_file = pdf_file[:-4]
for page in pages:
page.save("%s-page%d.jpg" % (pdf_file,pages.index(page)), "JPEG")
from pdf2jpg import pdf2jpg
inputpath = r"D:\inputdir\pdf1.pdf"
outputpath = r"D:\outputdir"# To convert single page
result = pdf2jpg.convert_pdf2jpg(inputpath, outputpath, pages="1")print(result)# To convert multiple pages
result = pdf2jpg.convert_pdf2jpg(inputpath, outputpath, pages="1,0,3")print(result)# to convert all pages
result = pdf2jpg.convert_pdf2jpg(inputpath, outputpath, pages="ALL")print(result)
from pdf2jpg import pdf2jpg
inputpath = r"D:\inputdir\pdf1.pdf"
outputpath = r"D:\outputdir"
# To convert single page
result = pdf2jpg.convert_pdf2jpg(inputpath, outputpath, pages="1")
print(result)
# To convert multiple pages
result = pdf2jpg.convert_pdf2jpg(inputpath, outputpath, pages="1,0,3")
print(result)
# to convert all pages
result = pdf2jpg.convert_pdf2jpg(inputpath, outputpath, pages="ALL")
print(result)
cd $dir
for f in *.pdf
do
if [ -f "${f}" ]; then
n=$(echo "$f" | cut -f1 -d'.')
pdftoppm -scale-to 1440 -png $f $conv/$n
rm $f
mv $conv/*.png $dir
fi
done
This is a small part of a bash script in a loop for the use of a narrow casting device.
Checks every 5 seconds on added pdf files (all) and processes them.
This is for a demo device, at the end converting will be done at a remote server. Converting to .PNG now, but .JPG is possible too.
This converting, together with transitions on A4 format, displaying a video, two smooth scrolling texts and a logo (with transition in three versions) sets the Pi3 to allmost 4x 100% cpu-load ;-)
回答 8
from pdf2image import convert_from_path
import glob
pdf_dir = glob.glob(r'G:\personal\pdf\*')#your pdf folder path
img_dir ="G:\\personal\\img\\"#your dest img pathfor pdf_ in pdf_dir:
pages = convert_from_path(pdf_,500)for page in pages:
page.save(img_dir+pdf_.split("\\")[-1][:-3]+"jpg",'JPEG')
from pdf2image import convert_from_path
import glob
pdf_dir = glob.glob(r'G:\personal\pdf\*') #your pdf folder path
img_dir = "G:\\personal\\img\\" #your dest img path
for pdf_ in pdf_dir:
pages = convert_from_path(pdf_, 500)
for page in pages:
page.save(img_dir+pdf_.split("\\")[-1][:-3]+"jpg", 'JPEG')
I would like to take an image and change the scale of the image, while it is a numpy array.
For example I have this image of a coca-cola bottle:
bottle-1
Which translates to a numpy array of shape (528, 203, 3) and I want to resize that to say the size of this second image:
bottle-2
Which has a shape of (140, 54, 3).
How do I change the size of the image to a certain shape while still maintaining the original image? Other answers suggest stripping every other or third row out, but what I want to do is basically shrink the image how you would via an image editor but in python code. Are there any libraries to do this in numpy/SciPy?
Yeah, you can install opencv (this is a library used for image processing, and computer vision), and use the cv2.resize function. And for instance use:
import cv2
import numpy as np
img = cv2.imread('your_image.jpg')
res = cv2.resize(img, dsize=(54, 140), interpolation=cv2.INTER_CUBIC)
Here img is thus a numpy array containing the original image, whereas res is a numpy array containing the resized image. An important aspect is the interpolation parameter: there are several ways how to resize an image. Especially since you scale down the image, and the size of the original image is not a multiple of the size of the resized image. Possible interpolation schemas are:
INTER_NEAREST – a nearest-neighbor interpolation
INTER_LINEAR – a bilinear interpolation (used by default)
INTER_AREA – resampling using pixel area relation. It may be a preferred method for image decimation, as it gives moire’-free
results. But when the image is zoomed, it is similar to the
INTER_NEAREST method.
INTER_CUBIC – a bicubic interpolation over 4×4 pixel neighborhood
INTER_LANCZOS4 – a Lanczos interpolation over 8×8 pixel neighborhood
Like with most options, there is no “best” option in the sense that for every resize schema, there are scenarios where one strategy can be preferred over another.
While it might be possible to use numpy alone to do this, the operation is not built-in. That said, you can use scikit-image (which is built on numpy) to do this kind of image manipulation.
For people coming here from Google looking for a fast way to downsample images in numpy arrays for use in Machine Learning applications, here’s a super fast method (adapted from here ). This method only works when the input dimensions are a multiple of the output dimensions.
The following examples downsample from 128×128 to 64×64 (this can be easily changed).
Channels last ordering
# large image is shape (128, 128, 3)
# small image is shape (64, 64, 3)
input_size = 128
output_size = 64
bin_size = input_size // output_size
small_image = large_image.reshape((output_size, bin_size,
output_size, bin_size, 3)).max(3).max(1)
Channels first ordering
# large image is shape (3, 128, 128)
# small image is shape (3, 64, 64)
input_size = 128
output_size = 64
bin_size = input_size // output_size
small_image = large_image.reshape((3, output_size, bin_size,
output_size, bin_size)).max(4).max(2)
For grayscale images just change the 3 to a 1 like this:
Channels first ordering
# large image is shape (1, 128, 128)
# small image is shape (1, 64, 64)
input_size = 128
output_size = 64
bin_size = input_size // output_size
small_image = large_image.reshape((1, output_size, bin_size,
output_size, bin_size)).max(4).max(2)
This method uses the equivalent of max pooling. It’s the fastest way to do this that I’ve found.
#simple image scaling to (nR x nC) sizedef scale(im, nR, nC):
nR0 = len(im)# source number of rows
nC0 = len(im[0])# source number of columns return[[ im[int(nR0 * r / nR)][int(nC0 * c / nC)]for c in range(nC)]for r in range(nR)]
用法示例:将(30 x 30)图像调整为(100 x 200):
import matplotlib.pyplot as plt
def sqr(x):return x*x
def f(r, c, nR, nC):return1.0if sqr(c - nC/2)+ sqr(r - nR/2)< sqr(nC/4)else0.0# a red circle on a canvas of size (nR x nC)def circ(nR, nC):return[[[f(r, c, nR, nC),0,0]for c in range(nC)]for r in range(nR)]
plt.imshow(scale(circ(30,30),100,200))
If anyone came here looking for a simple method to scale/resize an image in Python, without using additional libraries, here’s a very simple image resize function:
#simple image scaling to (nR x nC) size
def scale(im, nR, nC):
nR0 = len(im) # source number of rows
nC0 = len(im[0]) # source number of columns
return [[ im[int(nR0 * r / nR)][int(nC0 * c / nC)]
for c in range(nC)] for r in range(nR)]
Example usage: resizing a (30 x 30) image to (100 x 200):
import matplotlib.pyplot as plt
def sqr(x):
return x*x
def f(r, c, nR, nC):
return 1.0 if sqr(c - nC/2) + sqr(r - nR/2) < sqr(nC/4) else 0.0
# a red circle on a canvas of size (nR x nC)
def circ(nR, nC):
return [[ [f(r, c, nR, nC), 0, 0]
for c in range(nC)] for r in range(nR)]
plt.imshow(scale(circ(30, 30), 100, 200))
Output:
This works to shrink/scale images, and works fine with numpy arrays.
回答 4
SciPy的imresize()方法是另一种调整大小的方法,但是将从SciPy v 1.3.0开始将其删除。SciPy指的是PIL图像调整大小方法:Image.resize(size, resample=0)
SciPy’s imresize() method was another resize method, but it will be removed starting with SciPy v 1.3.0 . SciPy refers to PIL image resize method: Image.resize(size, resample=0)
size – The requested size in pixels, as a 2-tuple: (width, height). resample – An optional resampling filter. This can be one of PIL.Image.NEAREST (use nearest neighbour), PIL.Image.BILINEAR (linear interpolation), PIL.Image.BICUBIC (cubic spline interpolation), or PIL.Image.LANCZOS (a high-quality downsampling filter). If omitted, or if the image has mode “1” or “P”, it is set PIL.Image.NEAREST.
Sure. You can do this without OpenCV, scikit-image or PIL.
Image resizing is basically mapping the coordinates of each pixel from the original image to its resized position.
Since the coordinates of an image must be integers (think of it as a matrix), if the mapped coordinate has decimal values, you should interpolate the pixel value to approximate it to the integer position (e.g. getting the nearest pixel to that position is known as Nearest neighbor interpolation).
All you need is a function that does this interpolation for you. SciPy has interpolate.interp2d.
You can use it to resize an image in numpy array, say arr, as follows:
I want to use OpenCV2.0 and Python2.6 to show resized images. I used and adopted this example but unfortunately, this code is for OpenCV2.1 and does not seem to be working on 2.0. Here my code:
import os, glob
import cv
ulpath = "exampleshq/"
for infile in glob.glob( os.path.join(ulpath, "*.jpg") ):
im = cv.LoadImage(infile)
thumbnail = cv.CreateMat(im.rows/10, im.cols/10, cv.CV_8UC3)
cv.Resize(im, thumbnail)
cv.NamedWindow(infile)
cv.ShowImage(infile, thumbnail)
cv.WaitKey(0)
cv.DestroyWindow(name)
Since I cannot use
cv.LoadImageM
I used
cv.LoadImage
instead, which was no problem in other applications. Nevertheless, cv.iplimage has no attribute rows, cols or size. Can anyone give me a hint, how to solve this problem?
dst = cv2.resize(src, None, fx = 2, fy = 2, interpolation = cv2.INTER_CUBIC),
where fx is the scaling factor along the horizontal axis and fy along the vertical axis.
To shrink an image, it will generally look best with INTER_AREA interpolation, whereas to enlarge an image, it will generally look best with INTER_CUBIC (slow) or INTER_LINEAR (faster but still looks OK).
Example shrink image to fit a max height/width (keeping aspect ratio)
import cv2
img = cv2.imread('YOUR_PATH_TO_IMG')
height, width = img.shape[:2]
max_height = 300
max_width = 300
# only shrink if img is bigger than required
if max_height < height or max_width < width:
# get scaling factor
scaling_factor = max_height / float(height)
if max_width/float(width) < scaling_factor:
scaling_factor = max_width / float(width)
# resize image
img = cv2.resize(img, None, fx=scaling_factor, fy=scaling_factor, interpolation=cv2.INTER_AREA)
cv2.imshow("Shrinked image", img)
key = cv2.waitKey()
You could use the GetSize function to get those information,
cv.GetSize(im)
would return a tuple with the width and height of the image.
You can also use im.depth and img.nChan to get some more information.
And to resize an image, I would use a slightly different process, with another image instead of a matrix. It is better to try to work with the same type of data:
# Resizes a image and maintains aspect ratiodef maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):# Grab the image size and initialize dimensions
dim =None(h, w)= image.shape[:2]# Return original image if no need to resizeif width isNoneand height isNone:return image
# We are resizing height if width is noneif width isNone:# Calculate the ratio of the height and construct the dimensions
r = height / float(h)
dim =(int(w * r), height)# We are resizing width if height is noneelse:# Calculate the ratio of the width and construct the dimensions
r = width / float(w)
dim =(width, int(h * r))# Return the resized imagereturn cv2.resize(image, dim, interpolation=inter)
Here’s a function to upscale or downscale an image by desired width or height while maintaining aspect ratio
# Resizes a image and maintains aspect ratio
def maintain_aspect_ratio_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
# Grab the image size and initialize dimensions
dim = None
(h, w) = image.shape[:2]
# Return original image if no need to resize
if width is None and height is None:
return image
# We are resizing height if width is none
if width is None:
# Calculate the ratio of the height and construct the dimensions
r = height / float(h)
dim = (int(w * r), height)
# We are resizing width if height is none
else:
# Calculate the ratio of the width and construct the dimensions
r = width / float(w)
dim = (width, int(h * r))
# Return the resized image
return cv2.resize(image, dim, interpolation=inter)