You can use os.listdir() to get the files in the source directory, os.path.isfile() to see if they are regular files (including symbolic links on *nix systems), and shutil.copy to do the copying.
The following code copies only the regular files from the source directory into the destination directory (I’m assuming you don’t want any sub-directories copied).
import os
import shutil
src_files = os.listdir(src)
for file_name in src_files:
full_file_name = os.path.join(src, file_name)
if os.path.isfile(full_file_name):
shutil.copy(full_file_name, dest)
If you don’t want to copy the whole tree (with subdirs etc), use or glob.glob("path/to/dir/*.*") to get a list of all the filenames, loop over the list and use shutil.copy to copy each file.
for filename in glob.glob(os.path.join(source_dir, '*.*')):
shutil.copy(filename, dest_dir)
import os
import shutil
os.chdir('C:\\') #Make sure you add your source and destination path below
dir_src = ("C:\\foooo\\")
dir_dst = ("C:\\toooo\\")
for filename in os.listdir(dir_src):
if filename.endswith('.txt'):
shutil.copy( dir_src + filename, dir_dst)
print(filename)
import os
import shutil
def recursive_copy(src, dest):"""
Copy each file from src dir to dest dir, including sub-directories.
"""for item in os.listdir(src):
file_path = os.path.join(src, item)# if item is a file, copy itif os.path.isfile(file_path):
shutil.copy(file_path, dest)# else if item is a folder, recurse elif os.path.isdir(file_path):
new_dest = os.path.join(dest, item)
os.mkdir(new_dest)
recursive_copy(file_path, new_dest)
Here is another example of a recursive copy function that lets you copy the contents of the directory (including sub-directories) one file at a time, which I used to solve this problem.
import os
import shutil
def recursive_copy(src, dest):
"""
Copy each file from src dir to dest dir, including sub-directories.
"""
for item in os.listdir(src):
file_path = os.path.join(src, item)
# if item is a file, copy it
if os.path.isfile(file_path):
shutil.copy(file_path, dest)
# else if item is a folder, recurse
elif os.path.isdir(file_path):
new_dest = os.path.join(dest, item)
os.mkdir(new_dest)
recursive_copy(file_path, new_dest)
EDIT: If you can, definitely just use shutil.copytree(src, dest). This requires that that destination folder does not already exist though. If you need to copy files into an existing folder, the above method works well!
def get_graph():
f=open('kargerMinCut.txt')
G={}for line in f:
ints =[int(x)for x in line.split()]
G[ints[0]]=ints[1:len(ints)]return G
def get_edge(G):
E=[]for i in range(1,201):for v in G[i]:if v>i:
E.append([i,v])print id(E)return E
def karger(E):import random
count=200while1:if count ==2:break
edge = random.randint(0,len(E)-1)
v0=E[edge][0]
v1=E[edge][1]
E.pop(edge)if v0 != v1:
count -=1
i=0while1:if i == len(E):breakif E[i][0]== v1:
E[i][0]= v0
if E[i][1]== v1:
E[i][1]= v0
if E[i][0]== E[i][1]:
E.pop(i)
i-=1
i+=1
mincut=len(E)return mincut
if __name__=="__main__":import copy
G = get_graph()
results=[]
E0 = get_edge(G)print E0[1:10]## this result is not equal to print2for k in range(1,5):
E0_copy=list(E0)## I guess here E0_coypy is a deep copy of E0
results.append(karger(E0_copy))#print "the result is %d" %min(results)print E0[1:10]## this is print2
So After I got E0 from 'get_edge', I make a copy of E0 by calling 'E0_copy = list(E0)'. Here I guess E0_copy is a deep copy of E0, and I pass E0_copy into 'karger(E)'. But in the main function.
Why does the result of 'print E0[1:10]' before the for loop is not the same with that after the for loop?
Below is my code:
def get_graph():
f=open('kargerMinCut.txt')
G={}
for line in f:
ints = [int(x) for x in line.split()]
G[ints[0]]=ints[1:len(ints)]
return G
def get_edge(G):
E=[]
for i in range(1,201):
for v in G[i]:
if v>i:
E.append([i,v])
print id(E)
return E
def karger(E):
import random
count=200
while 1:
if count == 2:
break
edge = random.randint(0,len(E)-1)
v0=E[edge][0]
v1=E[edge][1]
E.pop(edge)
if v0 != v1:
count -= 1
i=0
while 1:
if i == len(E):
break
if E[i][0] == v1:
E[i][0] = v0
if E[i][1] == v1:
E[i][1] = v0
if E[i][0] == E[i][1]:
E.pop(i)
i-=1
i+=1
mincut=len(E)
return mincut
if __name__=="__main__":
import copy
G = get_graph()
results=[]
E0 = get_edge(G)
print E0[1:10] ## this result is not equal to print2
for k in range(1,5):
E0_copy=list(E0) ## I guess here E0_coypy is a deep copy of E0
results.append(karger(E0_copy))
#print "the result is %d" %min(results)
print E0[1:10] ## this is print2
deepcopy(x, memo=None, _nil=[])Deep copy operation on arbitrary Python objects.
请参阅以下代码段-
>>> a =[[1,2,3],[4,5,6]]>>> b = list(a)>>> a
[[1,2,3],[4,5,6]]>>> b
[[1,2,3],[4,5,6]]>>> a[0][1]=10>>> a
[[1,10,3],[4,5,6]]>>> b # b changes too -> Not a deepcopy.[[1,10,3],[4,5,6]]
现在看deepcopy操作
>>>import copy
>>> b = copy.deepcopy(a)>>> a
[[1,10,3],[4,5,6]]>>> b
[[1,10,3],[4,5,6]]>>> a[0][1]=9>>> a
[[1,9,3],[4,5,6]]>>> b # b doesn't change -> Deep Copy[[1,10,3],[4,5,6]]
a =[1,2]
b =[a,a]# there's only 1 object a
c = deepcopy(b)# check the result
c[0]is a # return False, a new object a' is created
c[0]is c[1]# return True, c is [a',a'] not [a',a'']
I believe a lot of programmers have run into one or two interview problems where they are asked to deep copy a linked list, however this problem is harder than it sounds!
in python, there is a module called “copy” with two useful functions
import copy
copy.copy()
copy.deepcopy()
copy() is a shallow copy function, if the given argument is a compound data structure, for instance a list, then python will create another object of the same type (in this case, a new list) but for everything inside old list, only their reference is copied
# think of it like
newList = [elem for elem in oldlist]
Intuitively, we could assume that deepcopy() would follow the same paradigm, and the only difference is that for each elem we will recursively call deepcopy, (just like the answer of mbcoder)
but this is wrong!
deepcopy() actually preserve the graphical structure of the original compound data:
a = [1,2]
b = [a,a] # there's only 1 object a
c = deepcopy(b)
# check the result
c[0] is a # return False, a new object a' is created
c[0] is c[1] # return True, c is [a',a'] not [a',a'']
this is the tricky part, during the process of deepcopy() a hashtable(dictionary in python) is used to map:
“old_object ref onto new_object ref”, this prevent unnecessary duplicates and thus preserve the structure of the copied compound data
a =[0,1,2,3,4,5,6,7,8,9,10]
b = a[:]#deep copying the list a and assigning it to bprint id(a)20983280print id(b)12967208
a[2]=20print a
[0,1,20,3,4,5,6,7,8,9,10]print b
[0,1,2,3,4,5,6,7,8,9,10]
If your list elements are immutable objects then you can use this, otherwise you have to use deepcopy from copy module.
you can also use shortest way for deep copy a list like this.
a = [0,1,2,3,4,5,6,7,8,9,10]
b = a[:] #deep copying the list a and assigning it to b
print id(a)
20983280
print id(b)
12967208
a[2] = 20
print a
[0, 1, 20, 3, 4, 5, 6, 7, 8, 9,10]
print b
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10]
Can somebody explain what exactly makes a difference between the copies? Is it something related to mutable & immutable objects? If so, can you please explain it to me?
Normal assignment operations will simply point the new variable towards the existing object. The docs explain the difference between shallow and deep copies:
The difference between shallow and deep copying is only relevant for
compound objects (objects that contain other objects, like lists or
class instances):
A shallow copy constructs a new compound object and then (to the extent possible) inserts references into it to the objects found in the original.
A deep copy constructs a new compound object and then, recursively, inserts copies into it of the objects found in the
original.
Here’s a little demonstration:
import copy
a = [1, 2, 3]
b = [4, 5, 6]
c = [a, b]
Using normal assignment operatings to copy:
d = c
print id(c) == id(d) # True - d is the same object as c
print id(c[0]) == id(d[0]) # True - d[0] is the same object as c[0]
Using a shallow copy:
d = copy.copy(c)
print id(c) == id(d) # False - d is now a new object
print id(c[0]) == id(d[0]) # True - d[0] is the same object as c[0]
Using a deep copy:
d = copy.deepcopy(c)
print id(c) == id(d) # False - d is now a new object
print id(c[0]) == id(d[0]) # False - d[0] is now a new object
For immutable objects, there is no need for copying because the data will never change, so Python uses the same data; ids are always the same. For mutable objects, since they can potentially change, [shallow] copy creates a new object.
Deep copy is related to nested structures. If you have list of lists, then deepcopy copies the nested lists also, so it is a recursive copy. With just copy, you have a new outer list, but inner lists are references.
Assignment does not copy. It simply sets the reference to the old data. So you need copy to create a new list with the same contents.
>>>import copy
>>> i =[1,2,3]>>> j = copy.copy(i)>>> hex(id(i)), hex(id(j))>>>('0x102b9b7c8','0x102971cc8')#Both addresses are different>>> i.append(4)>>> j
>>>[1,2,3]#Updation of original list didn't affected copied variable
嵌套列表示例使用copy:
>>>import copy
>>> i =[1,2,3,[4,5]]>>> j = copy.copy(i)>>> hex(id(i)), hex(id(j))>>>('0x102b9b7c8','0x102971cc8')#Both addresses are still different>>> hex(id(i[3])), hex(id(j[3]))>>>('0x10296f908','0x10296f908')#Nested lists have same address>>> i[3].append(6)>>> j
>>>[1,2,3,[4,5,6]]#Updation of original nested list updated the copy as well
平面清单示例使用deepcopy:
>>>import copy
>>> i =[1,2,3]>>> j = copy.deepcopy(i)>>> hex(id(i)), hex(id(j))>>>('0x102b9b7c8','0x102971cc8')#Both addresses are different>>> i.append(4)>>> j
>>>[1,2,3]#Updation of original list didn't affected copied variable
嵌套列表示例使用deepcopy:
>>>import copy
>>> i =[1,2,3,[4,5]]>>> j = copy.deepcopy(i)>>> hex(id(i)), hex(id(j))>>>('0x102b9b7c8','0x102971cc8')#Both addresses are still different>>> hex(id(i[3])), hex(id(j[3]))>>>('0x10296f908','0x102b9b7c8')#Nested lists have different addresses>>> i[3].append(6)>>> j
>>>[1,2,3,[4,5]]#Updation of original nested list didn't affected the copied variable
For immutable objects, creating a copy don’t make much sense since they are not going to change. For mutable objects assignment,copy and deepcopy behaves differently. Lets talk about each of them with examples.
An assignment operation simply assigns the reference of source to destination e.g:
>>> i = [1,2,3]
>>> j=i
>>> hex(id(i)), hex(id(j))
>>> ('0x10296f908', '0x10296f908') #Both addresses are identical
Now i and j technically refers to same list. Both i and j have same memory address. Any updation to either
of them will be reflected to the other. e.g:
>>> i.append(4)
>>> j
>>> [1,2,3,4] #Destination is updated
>>> j.append(5)
>>> i
>>> [1,2,3,4,5] #Source is updated
On the other hand copy and deepcopy creates a new copy of variable. So now changes to original variable will not be reflected
to the copy variable and vice versa. However copy(shallow copy), don’t creates a copy of nested objects, instead it just
copies the reference of nested objects. Deepcopy copies all the nested objects recursively.
Some examples to demonstrate behaviour of copy and deepcopy:
Flat list example using copy:
>>> import copy
>>> i = [1,2,3]
>>> j = copy.copy(i)
>>> hex(id(i)), hex(id(j))
>>> ('0x102b9b7c8', '0x102971cc8') #Both addresses are different
>>> i.append(4)
>>> j
>>> [1,2,3] #Updation of original list didn't affected copied variable
Nested list example using copy:
>>> import copy
>>> i = [1,2,3,[4,5]]
>>> j = copy.copy(i)
>>> hex(id(i)), hex(id(j))
>>> ('0x102b9b7c8', '0x102971cc8') #Both addresses are still different
>>> hex(id(i[3])), hex(id(j[3]))
>>> ('0x10296f908', '0x10296f908') #Nested lists have same address
>>> i[3].append(6)
>>> j
>>> [1,2,3,[4,5,6]] #Updation of original nested list updated the copy as well
Flat list example using deepcopy:
>>> import copy
>>> i = [1,2,3]
>>> j = copy.deepcopy(i)
>>> hex(id(i)), hex(id(j))
>>> ('0x102b9b7c8', '0x102971cc8') #Both addresses are different
>>> i.append(4)
>>> j
>>> [1,2,3] #Updation of original list didn't affected copied variable
Nested list example using deepcopy:
>>> import copy
>>> i = [1,2,3,[4,5]]
>>> j = copy.deepcopy(i)
>>> hex(id(i)), hex(id(j))
>>> ('0x102b9b7c8', '0x102971cc8') #Both addresses are still different
>>> hex(id(i[3])), hex(id(j[3]))
>>> ('0x10296f908', '0x102b9b7c8') #Nested lists have different addresses
>>> i[3].append(6)
>>> j
>>> [1,2,3,[4,5]] #Updation of original nested list didn't affected the copied variable
回答 3
让我们在一个图形示例中查看如何执行以下代码:
import copy
classFoo(object):def __init__(self):pass
a =[Foo(),Foo()]
shallow = copy.copy(a)
deep = copy.deepcopy(a)
a, b, c, d, a1, b1, c1 and d1 are references to objects in memory, which are uniquely identified by their ids.
An assignment operation takes a reference to the object in memory and assigns that reference to a new name. c=[1,2,3,4] is an assignment that creates a new list object containing those four integers, and assigns the reference to that object to c. c1=c is an assignment that takes the same reference to the same object and assigns that to c1. Since the list is mutable, anything that happens to that list will be visible regardless of whether you access it through c or c1, because they both reference the same object.
c1=copy.copy(c) is a “shallow copy” that creates a new list and assigns the reference to the new list to c1. c still points to the original list. So, if you modify the list at c1, the list that c refers to will not change.
The concept of copying is irrelevant to immutable objects like integers and strings. Since you can’t modify those objects, there is never a need to have two copies of the same value in memory at different locations. So integers and strings, and some other objects to which the concept of copying does not apply, are simply reassigned. This is why your examples with a and b result in identical ids.
c1=copy.deepcopy(c) is a “deep copy”, but it functions the same as a shallow copy in this example. Deep copies differ from shallow copies in that shallow copies will make a new copy of the object itself, but any references inside that object will not themselves be copied. In your example, your list has only integers inside it (which are immutable), and as previously discussed there is no need to copy those. So the “deep” part of the deep copy does not apply. However, consider this more complex list:
e = [[1, 2],[4, 5, 6],[7, 8, 9]]
This is a list that contains other lists (you could also describe it as a two-dimensional array).
If you run a “shallow copy” on e, copying it to e1, you will find that the id of the list changes, but each copy of the list contains references to the same three lists — the lists with integers inside. That means that if you were to do e[0].append(3), then e would be [[1, 2, 3],[4, 5, 6],[7, 8, 9]]. But e1 would also be [[1, 2, 3],[4, 5, 6],[7, 8, 9]]. On the other hand, if you subsequently did e.append([10, 11, 12]), e would be [[1, 2, 3],[4, 5, 6],[7, 8, 9],[10, 11, 12]]. But e1 would still be [[1, 2, 3],[4, 5, 6],[7, 8, 9]]. That’s because the outer lists are separate objects that initially each contain three references to three inner lists. If you modify the inner lists, you can see those changes no matter if you are viewing them through one copy or the other. But if you modify one of the outer lists as above, then e contains three references to the original three lists plus one more reference to a new list. And e1 still only contains the original three references.
A ‘deep copy’ would not only duplicate the outer list, but it would also go inside the lists and duplicate the inner lists, so that the two resulting objects do not contain any of the same references (as far as mutable objects are concerned). If the inner lists had further lists (or other objects such as dictionaries) inside of them, they too would be duplicated. That’s the ‘deep’ part of the ‘deep copy’.
import copy
list1 =[['a','b','c'],['d','e',' f ']]# assigning a list
list2 = copy.copy(list1)# shallow copy is done using copy function of copy module
list1.append (['g','h','i'])# appending another list to list1print list1
list1 =[['a','b','c'],['d','e',' f '],['g','h','i']]
list2 =[['a','b','c'],['d','e',' f ']]
注意,list2仍然不受影响,但是如果我们对子对象进行更改,例如:
list1[0][0]='x’
那么list1和list2都将得到更改:
list1 =[['x','b','c'],['d','e',' f '],['g','h','i']]
list2 =[['x','b','c'],['d','e',' f ']]
import copy
list1 =[['a','b','c'],['d','e',' f ']]# assigning a list
list2 = deepcopy.copy(list1)# deep copy is done using deepcopy function of copy module
list1.append (['g','h','i'])# appending another list to list1print list1
list1 =[['a','b','c'],['d','e',' f '],['g','h','i']]
list2 =[['a','b','c'],['d','e',' f ']]
注意,list2仍然不受影响,但是如果我们对子对象进行更改,例如:
list1[0][0]='x’
那么list2也不受影响,因为所有子对象和父对象都指向不同的内存位置:
list1 =[['x','b','c'],['d','e',' f '],['g','h','i']]
list2 =[['a','b','c'],['d','e',' f ']]
In python, when we assign objects like list, tuples, dict, etc to another object usually with a ‘ = ‘ sign, python creates copy’s by reference. That is, let’s say we have a list of list like this :
then if we print list2 in python terminal we’ll get this :
list2 = [ [ 'a', 'b', 'c'] , [ 'd', 'e', ' f '] ]
Both list1 & list2 are pointing to same memory location, any change to any one them will result in changes visible in both objects, i.e both objects are pointing to same memory location.
If we change list1 like this :
Now coming to Shallow copy, when two objects are copied via shallow copy, the child object of both parent object refers to same memory location but any further new changes in any of the copied object will be independent to each other.
Let’s understand this with a small example. Suppose we have this small code snippet :
import copy
list1 = [ [ 'a', 'b', 'c'] , [ 'd', 'e', ' f '] ] # assigning a list
list2 = copy.copy(list1) # shallow copy is done using copy function of copy module
list1.append ( [ 'g', 'h', 'i'] ) # appending another list to list1
print list1
list1 = [ [ 'a', 'b', 'c'] , [ 'd', 'e', ' f '] , [ 'g', 'h', 'i'] ]
list2 = [ [ 'a', 'b', 'c'] , [ 'd', 'e', ' f '] ]
notice, list2 remains unaffected, but if we make changes to child objects like :
Now, Deep copy helps in creating completely isolated objects out of each other. If two objects are copied via Deep Copy then both parent & it’s child will be pointing to different memory location.
Example :
import copy
list1 = [ [ 'a', 'b', 'c'] , [ 'd', 'e', ' f '] ] # assigning a list
list2 = deepcopy.copy(list1) # deep copy is done using deepcopy function of copy module
list1.append ( [ 'g', 'h', 'i'] ) # appending another list to list1
print list1
list1 = [ [ 'a', 'b', 'c'] , [ 'd', 'e', ' f '] , [ 'g', 'h', 'i'] ]
list2 = [ [ 'a', 'b', 'c'] , [ 'd', 'e', ' f '] ]
notice, list2 remains unaffected, but if we make changes to child objects like :
list1[0][0] = 'x’
then also list2 will be unaffected as all the child objects and parent object points to different memory location :
Below code demonstrates the difference between assignment, shallow copy using the copy method, shallow copy using the (slice) [:] and the deepcopy. Below example uses nested lists there by making the differences more evident.
The GIST to take is this:
Dealing with shallow lists (no sub_lists, just single elements) using “normal assignment” rises a “side effect” when you create a shallow list and then you create a copy of this list using “normal assignment”. This “side effect” is when you change any element of the copy list created, because it will automatically change the same elements of the original list. That is when copy comes in handy, as it won’t change the original list elements when changing the copy elements.
On the other hand, copy does have a “side effect” as well, when you have a list that has lists in it (sub_lists), and deepcopy solves it. For instance if you create a big list that has nested lists in it (sub_lists), and you create a copy of this big list (the original list). The “side effect” would arise when you modify the sub_lists of the copy list which would automatically modify the sub_lists of the big list. Sometimes (in some projects) you want to keep the big list (your original list) as it is without modification, and all you want is to make a copy of its elements (sub_lists). For that, your solution is to use deepcopy which will take care of this “side effect” and makes a copy without modifying the original content.
The different behaviors of copy and deep copy operations concerns only compound objects (ie: objects that contain other objects such as lists).
Here are the differences illustrated in this simple code example:
First
let’s check how copy (shallow) behaves, by creating an original list and a copy of this list:
Not sure if it mentioned above or not, but it’s very importable to undestand that .copy() create reference to original object. If you change copied object – you change the original object.
.deepcopy() creates new object and does real copying of original object to new one. Changing new deepcopied object doesn’t affect original object.
And yes, .deepcopy() copies original object recursively, while .copy() create a reference object to first-level data of original object.
So the copying/referencing difference between .copy() and .deepcopy() is significant.
Deep copy is related to nested structures. If you have list of lists, then deepcopy copies the nested lists also, so it is a recursive copy. With just copy, you have a new outer list, but inner lists are references. Assignment does not copy.
For Ex
[[0, 1, 2, 3, 3], 4, 5]
[[0, 1, 2, 3, 3], 4, 5, 3]
Copy method copy content of outer list to new list but inner list is still same for both list so if you make changes in inner list of any lists it will affects both list.
But if you use Deep copy then it will create new instance for inner list too.
>>lst=[1,2,3,4,5]>>a=lst
>>b=lst[:]>>> b
[1,2,3,4,5]>>> a
[1,2,3,4,5]>>> lst is b
False>>> lst is a
True>>> id(lst)46263192>>> id(a)46263192------>See here id of a and id of lst is same so its called deep copy and even boolean answer is true
>>> id(b)46263512------>See here id of b and id of lst isnot same so its called shallow copy and even boolean answer is false although output looks same.
>>lst=[1,2,3,4,5]
>>a=lst
>>b=lst[:]
>>> b
[1, 2, 3, 4, 5]
>>> a
[1, 2, 3, 4, 5]
>>> lst is b
False
>>> lst is a
True
>>> id(lst)
46263192
>>> id(a)
46263192 ------> See here id of a and id of lst is same so its called deep copy and even boolean answer is true
>>> id(b)
46263512 ------> See here id of b and id of lst is not same so its called shallow copy and even boolean answer is false although output looks same.
I would like to create a copy of an object. I want the new object to possess all properties of the old object (values of the fields). But I want to have independent objects. So, if I change values of the fields of the new object, the old object should not be affected by that.
To get a fully independent copy of an object you can use the copy.deepcopy() function.
For more details about shallow and deep copying please refer to the other answers to this question and the nice explanation in this answer to a related question.
As we can see, when the interior object of the copy is mutated, the original does not change.
Custom Objects
Custom objects usually store data in a __dict__ attribute or in __slots__ (a tuple-like memory structure.)
To make a copyable object, define __copy__ (for shallow copies) and/or __deepcopy__ (for deep copies).
from copy import copy, deepcopy
class Copyable:
__slots__ = 'a', '__dict__'
def __init__(self, a, b):
self.a, self.b = a, b
def __copy__(self):
return type(self)(self.a, self.b)
def __deepcopy__(self, memo): # memo is a dict of id's to copies
id_self = id(self) # memoization avoids unnecesary recursion
_copy = memo.get(id_self)
if _copy is None:
_copy = type(self)(
deepcopy(self.a, memo),
deepcopy(self.b, memo))
memo[id_self] = _copy
return _copy
Note that deepcopy keeps a memoization dictionary of id(original) (or identity numbers) to copies. To enjoy good behavior with recursive data structures, make sure you haven’t already made a copy, and if you have, return that.
I believe the following should work with many well-behaved classed in Python:
def copy(obj):
return type(obj)(obj)
(Of course, I am not talking here about “deep copies,” which is a different story, and which may be not a very clear concept — how deep is deep enough?)
According to my tests with Python 3, for immutable objects, like tuples or strings, it returns the same object (because there is no need to make a shallow copy of an immutable object), but for lists or dictionaries it creates an independent shallow copy.
Of course this method only works for classes whose constructors behave accordingly. Possible use cases: making a shallow copy of a standard Python container class.
sorted() returns a new sorted list, leaving the original list unaffected. list.sort() sorts the list in-place, mutating the list indices, and returns None (like all in-place operations).
sorted() works on any iterable, not just lists. Strings, tuples, dictionaries (you’ll get the keys), generators, etc., returning a list containing all elements, sorted.
Use list.sort() when you want to mutate the list, sorted() when you want a new sorted object back. Use sorted() when you want to sort something that is an iterable, not a list yet.
For lists, list.sort() is faster than sorted() because it doesn’t have to create a copy. For any other iterable, you have no choice.
No, you cannot retrieve the original positions. Once you called list.sort() the original order is gone.
import timeit
setup ="""
import random
lists = [list(range(10000)) for _ in range(1000)] # list of lists
for l in lists:
random.shuffle(l) # shuffle each list
shuffled_iter = iter(lists) # wrap as iterator so next() yields one at a time
"""
>>> timeit.repeat("next(shuffled_iter).sort()", setup=setup, number =1000)[3.75168503401801,3.7473005310166627,3.753129180986434]>>> timeit.repeat("sorted(next(shuffled_iter))", setup=setup, number =1000)[3.702025591977872,3.709248117986135,3.71071034099441]
Python 3
>>> timeit.repeat("next(shuffled_iter).sort()", setup=setup, number =1000)[2.797430992126465,2.796825885772705,2.7744789123535156]>>> timeit.repeat("sorted(next(shuffled_iter))", setup=setup, number =1000)[2.675589084625244,2.8019039630889893,2.849375009536743]
>>> timeit.repeat("lst[:].sort()", setup=setup, number =10000)[572.919036605,573.1384446719999,568.5923951]>>> timeit.repeat("sorted(lst[:])", setup=setup, number =10000)[647.0584738299999,653.4040515829997,657.9457361929999]
What is the difference between sorted(list) vs list.sort()?
list.sort mutates the list in-place & returns None
sorted takes any iterable & returns a new list, sorted.
sorted is equivalent to this Python implementation, but the CPython builtin function should run measurably faster as it is written in C:
def sorted(iterable, key=None):
new_list = list(iterable) # make a new list
new_list.sort(key=key) # sort it
return new_list # return it
when to use which?
Use list.sort when you do not wish to retain the original sort order
(Thus you will be able to reuse the list in-place in memory.) and when
you are the sole owner of the list (if the list is shared by other code
and you mutate it, you could introduce bugs where that list is used.)
Use sorted when you want to retain the original sort order or when you
wish to create a new list that only your local code owns.
Can a list’s original positions be retrieved after list.sort()?
No – unless you made a copy yourself, that information is lost because the sort is done in-place.
“And which is faster? And how much faster?”
To illustrate the penalty of creating a new list, use the timeit module, here’s our setup:
import timeit
setup = """
import random
lists = [list(range(10000)) for _ in range(1000)] # list of lists
for l in lists:
random.shuffle(l) # shuffle each list
shuffled_iter = iter(lists) # wrap as iterator so next() yields one at a time
"""
And here’s our results for a list of randomly arranged 10000 integers, as we can see here, we’ve disproven an older list creation expense myth:
Python 2.7
>>> timeit.repeat("next(shuffled_iter).sort()", setup=setup, number = 1000)
[3.75168503401801, 3.7473005310166627, 3.753129180986434]
>>> timeit.repeat("sorted(next(shuffled_iter))", setup=setup, number = 1000)
[3.702025591977872, 3.709248117986135, 3.71071034099441]
Python 3
>>> timeit.repeat("next(shuffled_iter).sort()", setup=setup, number = 1000)
[2.797430992126465, 2.796825885772705, 2.7744789123535156]
>>> timeit.repeat("sorted(next(shuffled_iter))", setup=setup, number = 1000)
[2.675589084625244, 2.8019039630889893, 2.849375009536743]
After some feedback, I decided another test would be desirable with different characteristics. Here I provide the same randomly ordered list of 100,000 in length for each iteration 1,000 times.
I interpret this larger sort’s difference coming from the copying mentioned by Martijn, but it does not dominate to the point stated in the older more popular answer here, here the increase in time is only about 10%
>>> timeit.repeat("lst[:].sort()", setup=setup, number = 10000)
[572.919036605, 573.1384446719999, 568.5923951]
>>> timeit.repeat("sorted(lst[:])", setup=setup, number = 10000)
[647.0584738299999, 653.4040515829997, 657.9457361929999]
I also ran the above on a much smaller sort, and saw that the new sorted copy version still takes about 2% longer running time on a sort of 1000 length.
Poke ran his own code as well, here’s the code:
setup = '''
import random
random.seed(12122353453462456)
lst = list(range({length}))
random.shuffle(lst)
lists = [lst[:] for _ in range({repeats})]
it = iter(lists)
'''
t1 = 'l = next(it); l.sort()'
t2 = 'l = next(it); sorted(l)'
length = 10 ** 7
repeats = 10 ** 2
print(length, repeats)
for t in t1, t2:
print(t)
print(timeit(t, setup=setup.format(length=length, repeats=repeats), number=repeats))
He found for 1000000 length sort, (ran 100 times) a similar result, but only about a 5% increase in time, here’s the output:
10000000 100
l = next(it); l.sort()
610.5015971539542
l = next(it); sorted(l)
646.7786222379655
Conclusion:
A large sized list being sorted with sorted making a copy will likely dominate differences, but the sorting itself dominates the operation, and organizing your code around these differences would be premature optimization. I would use sorted when I need a new sorted list of the data, and I would use list.sort when I need to sort a list in-place, and let that determine my usage.
回答 2
主要区别是sorted(some_list)返回一个新的list:
a =[3,2,1]print sorted(a)# new listprint a # is not modified
和some_list.sort(),将清单排序到位:
a =[3,2,1]print a.sort()# in placeprint a # it's modified
The .sort() function stores the value of new list directly in the list variable; so answer for your third question would be NO.
Also if you do this using sorted(list), then you can get it use because it is not stored in the list variable. Also sometimes .sort() method acts as function, or say that it takes arguments in it.
You have to store the value of sorted(list) in a variable explicitly.
Also for short data processing the speed will have no difference; but for long lists; you should directly use .sort() method for fast work; but again you will face irreversible actions.
While reading up the documentation for dict.copy(), it says that it makes a shallow copy of the dictionary. Same goes for the book I am following (Beazley’s Python Reference), which says:
The m.copy() method makes a shallow
copy of the items contained in a
mapping object and places them in a
new mapping object.
Consider this:
>>> original = dict(a=1, b=2)
>>> new = original.copy()
>>> new.update({'c': 3})
>>> original
{'a': 1, 'b': 2}
>>> new
{'a': 1, 'c': 3, 'b': 2}
So I assumed this would update the value of original (and add ‘c’: 3) also since I was doing a shallow copy. Like if you do it for a list:
>>> original = [1, 2, 3]
>>> new = original
>>> new.append(4)
>>> new, original
([1, 2, 3, 4], [1, 2, 3, 4])
This works as expected.
Since both are shallow copies, why is that the dict.copy() doesn’t work as I expect it to? Or my understanding of shallow vs deep copying is flawed?
回答 0
“浅复制”表示字典的内容不是按值复制,而只是创建一个新引用。
>>> a ={1:[1,2,3]}>>> b = a.copy()>>> a, b
({1:[1,2,3]},{1:[1,2,3]})>>> a[1].append(4)>>> a, b
({1:[1,2,3,4]},{1:[1,2,3,4]})
相反,深层副本将按值复制所有内容。
>>>import copy
>>> c = copy.deepcopy(a)>>> a, c
({1:[1,2,3,4]},{1:[1,2,3,4]})>>> a[1].append(5)>>> a, c
({1:[1,2,3,4,5]},{1:[1,2,3,4]})
It’s not a matter of deep copy or shallow copy, none of what you’re doing is deep copy.
Here:
>>> new = original
you’re creating a new reference to the the list/dict referenced by original.
while here:
>>> new = original.copy()
>>> # or
>>> new = list(original) # dict(original)
you’re creating a new list/dict which is filled with a copy of the references of objects contained in the original container.
回答 2
举个例子:
original = dict(a=1, b=2, c=dict(d=4, e=5))
new = original.copy()
现在,让我们在“浅”(第一)级别中更改一个值:
new['a']=10# new = {'a': 10, 'b': 2, 'c': {'d': 4, 'e': 5}}# original = {'a': 1, 'b': 2, 'c': {'d': 4, 'e': 5}}# no change in original, since ['a'] is an immutable integer
现在让我们将值更深一级地更改:
new['c']['d']=40# new = {'a': 10, 'b': 2, 'c': {'d': 40, 'e': 5}}# original = {'a': 1, 'b': 2, 'c': {'d': 40, 'e': 5}}# new['c'] points to the same original['d'] mutable dictionary, so it will be changed
original = dict(a=1, b=2, c=dict(d=4, e=5))
new = original.copy()
Now let’s change a value in the ‘shallow’ (first) level:
new['a'] = 10
# new = {'a': 10, 'b': 2, 'c': {'d': 4, 'e': 5}}
# original = {'a': 1, 'b': 2, 'c': {'d': 4, 'e': 5}}
# no change in original, since ['a'] is an immutable integer
Now let’s change a value one level deeper:
new['c']['d'] = 40
# new = {'a': 10, 'b': 2, 'c': {'d': 40, 'e': 5}}
# original = {'a': 1, 'b': 2, 'c': {'d': 40, 'e': 5}}
# new['c'] points to the same original['d'] mutable dictionary, so it will be changed
Adding to kennytm’s answer. When you do a shallow copy parent.copy() a new dictionary is created with same keys,but the values are not copied they are referenced.If you add a new value to parent_copy it won’t effect parent because parent_copy is a new dictionary not reference.
The hash(id) value of parent[1], parent_copy[1] are identical which implies [1,2,3] of parent[1] and parent_copy[1] stored at id 140690938288400.
But hash of parent and parent_copy are different which implies
They are different dictionaries and parent_copy is a new dictionary having values reference to values of parent
So if the original dict contains a list or another dictionary, modifying one them in the original or its shallow copy will modify them (the list or the dict) in the other.
shutil has many methods you can use. One of which is:
from shutil import copyfile
copyfile(src, dst)
Copy the contents of the file named src to a file named dst.
The destination location must be writable; otherwise, an IOError exception will be raised.
If dst already exists, it will be replaced.
Special files such as character or block devices and pipes cannot be copied with this function.
With copy, src and dst are path names given as strings.
If you use os.path operations, use copy rather than copyfile. copyfile will only accept strings.
回答 1
┌──────────────────┬────────┬───────────┬───────┬────────────────┐
│ Function │ Copies │ Copies │Can use│ Destination │
│ │metadata│permissions│buffer │may be directory│
├──────────────────┼────────┼───────────┼───────┼────────────────┤
│shutil.copy │ No │ Yes │ No │ Yes │
│shutil.copyfile │ No │ No │ No │ No │
│shutil.copy2 │ Yes │ Yes │ No │ Yes │
│shutil.copyfileobj│ No │ No │ Yes │ No │
└──────────────────┴────────┴───────────┴───────┴────────────────┘
┌──────────────────┬────────┬───────────┬───────┬────────────────┐
│ Function │ Copies │ Copies │Can use│ Destination │
│ │metadata│permissions│buffer │may be directory│
├──────────────────┼────────┼───────────┼───────┼────────────────┤
│shutil.copy │ No │ Yes │ No │ Yes │
│shutil.copyfile │ No │ No │ No │ No │
│shutil.copy2 │ Yes │ Yes │ No │ Yes │
│shutil.copyfileobj│ No │ No │ Yes │ No │
└──────────────────┴────────┴───────────┴───────┴────────────────┘
os.popen(cmd[, mode[, bufsize]])# example# In Unix/Linux
os.popen('cp source.txt destination.txt')# In Windows
os.popen('copy source.txt destination.txt')
subprocess.call(args,*, stdin=None, stdout=None, stderr=None, shell=False)# example (WARNING: setting `shell=True` might be a security-risk)# In Linux/Unix
status = subprocess.call('cp source.txt destination.txt', shell=True)# In Windows
status = subprocess.call('copy source.txt destination.txt', shell=True)
subprocess.check_output(args,*, stdin=None, stderr=None, shell=False, universal_newlines=False)# example (WARNING: setting `shell=True` might be a security-risk)# In Linux/Unix
status = subprocess.check_output('cp source.txt destination.txt', shell=True)# In Windows
status = subprocess.check_output('copy source.txt destination.txt', shell=True)
os.popen(cmd[, mode[, bufsize]])
# example
# In Unix/Linux
os.popen('cp source.txt destination.txt')
# In Windows
os.popen('copy source.txt destination.txt')
subprocess.call(args, *, stdin=None, stdout=None, stderr=None, shell=False)
# example (WARNING: setting `shell=True` might be a security-risk)
# In Linux/Unix
status = subprocess.call('cp source.txt destination.txt', shell=True)
# In Windows
status = subprocess.call('copy source.txt destination.txt', shell=True)
subprocess.check_output(args, *, stdin=None, stderr=None, shell=False, universal_newlines=False)
# example (WARNING: setting `shell=True` might be a security-risk)
# In Linux/Unix
status = subprocess.check_output('cp source.txt destination.txt', shell=True)
# In Windows
status = subprocess.check_output('copy source.txt destination.txt', shell=True)
def copyfileobj_example(source, dest, buffer_size=1024*1024):"""
Copy a file from source to dest. source and dest
must be file-like objects, i.e. any object with a read or
write method, like for example StringIO.
"""whileTrue:
copy_buffer = source.read(buffer_size)ifnot copy_buffer:break
dest.write(copy_buffer)
如果要按文件名复制,可以执行以下操作:
def copyfile_example(source, dest):# Beware, this example does not handle any edge cases!with open(source,'rb')as src, open(dest,'wb')as dst:
copyfileobj_example(src, dst)
Copying a file is a relatively straightforward operation as shown by the examples below, but you should instead use the shutil stdlib module for that.
def copyfileobj_example(source, dest, buffer_size=1024*1024):
"""
Copy a file from source to dest. source and dest
must be file-like objects, i.e. any object with a read or
write method, like for example StringIO.
"""
while True:
copy_buffer = source.read(buffer_size)
if not copy_buffer:
break
dest.write(copy_buffer)
If you want to copy by filename you could do something like this:
def copyfile_example(source, dest):
# Beware, this example does not handle any edge cases!
with open(source, 'rb') as src, open(dest, 'wb') as dst:
copyfileobj_example(src, dst)
Copy the contents of the file named src to a file named dst. The destination location must be writable; otherwise, an IOError exception will be raised. If dst already exists, it will be replaced. Special files such as character or block devices and pipes cannot be copied with this function. src and dst are path names given as strings.
Take a look at filesys for all the file and directory handling functions available in standard Python modules.
shutil.copyfileobj(fsrc, fdst[, length]) manipulate opened objects
In [3]: src = '~/Documents/Head+First+SQL.pdf'
In [4]: dst = '~/desktop'
In [5]: shutil.copyfileobj(src, dst)
AttributeError: 'str' object has no attribute 'read'
#copy the file object
In [7]: with open(src, 'rb') as f1,open(os.path.join(dst,'test.pdf'), 'wb') as f2:
...: shutil.copyfileobj(f1, f2)
In [8]: os.stat(os.path.join(dst,'test.pdf'))
Out[8]: os.stat_result(st_mode=33188, st_ino=8598319475, st_dev=16777220, st_nlink=1, st_uid=501, st_gid=20, st_size=13507926, st_atime=1516067347, st_mtime=1516067335, st_ctime=1516067345)
shutil.copyfile(src, dst, *, follow_symlinks=True) Copy and rename
In [9]: shutil.copyfile(src, dst)
IsADirectoryError: [Errno 21] Is a directory: ~/desktop'
#so dst should be a filename instead of a directory name
For small files and using only python built-ins, you can use the following one-liner:
with open(source, 'rb') as src, open(dest, 'wb') as dst: dst.write(src.read())
As @maxschlepzig mentioned in the comments below, this is not optimal way for applications where the file is too large or when memory is critical, thus Swati’s answer should be preferred.
For large files, what I did was read the file line by line and read each line into an array. Then, once the array reached a certain size, append it to a new file.
for line in open("file.txt", "r"):
list.append(line)
if len(list) == 1000000:
output.writelines(list)
del list[:]
回答 12
from subprocess import call
call("cp -p <file> <file>", shell=True)
import copy
classFoo(object):def __init__(self, val):
self.val = val
def __repr__(self):return'Foo({!r})'.format(self.val)
foo =Foo(1)
a =['foo', foo]
b = a.copy()
c = a[:]
d = list(a)
e = copy.copy(a)
f = copy.deepcopy(a)# edit orignal list and instance
a.append('baz')
foo.val =5print('original: %r\nlist.copy(): %r\nslice: %r\nlist(): %r\ncopy: %r\ndeepcopy: %r'%(a, b, c, d, e, f))
With new_list = my_list, you don’t actually have two lists. The assignment just copies the reference to the list, not the actual list, so both new_list and my_list refer to the same list after the assignment.
To actually copy the list, you have various possibilities:
You can use the builtin list.copy() method (available since Python 3.3):
new_list = old_list.copy()
You can slice it:
new_list = old_list[:]
Alex Martelli’s opinion (at least back in 2007) about this is, that it is a weird syntax and it does not make sense to use it ever. ;) (In his opinion, the next one is more readable).
from copy import deepcopy
class old_class:def __init__(self):
self.blah ='blah'class new_class(object):def __init__(self):
self.blah ='blah'
dignore ={str:None, unicode:None, int:None, type(None):None}defCopy(obj, use_deepcopy=True):
t = type(obj)if t in(list, tuple):if t == tuple:# Convert to a list if a tuple to # allow assigning to when copying
is_tuple =True
obj = list(obj)else:# Otherwise just do a quick slice copy
obj = obj[:]
is_tuple =False# Copy each item recursivelyfor x in xrange(len(obj)):if type(obj[x])in dignore:continue
obj[x]=Copy(obj[x], use_deepcopy)if is_tuple:# Convert back into a tuple again
obj = tuple(obj)elif t == dict:# Use the fast shallow dict copy() method and copy any # values which aren't immutable (like lists, dicts etc)
obj = obj.copy()for k in obj:if type(obj[k])in dignore:continue
obj[k]=Copy(obj[k], use_deepcopy)elif t in dignore:# Numeric or string/unicode? # It's immutable, so ignore it!passelif use_deepcopy:
obj = deepcopy(obj)return obj
if __name__ =='__main__':import copy
from time import time
num_times =100000
L =[None,'blah',1,543.4532,['foo'],('bar',),{'blah':'blah'},
old_class(), new_class()]
t = time()for i in xrange(num_times):Copy(L)print'Custom Copy:', time()-t
t = time()for i in xrange(num_times):Copy(L, use_deepcopy=False)print'Custom Copy Only Copying Lists/Tuples/Dicts (no classes):', time()-t
t = time()for i in xrange(num_times):
copy.copy(L)print'copy.copy:', time()-t
t = time()for i in xrange(num_times):
copy.deepcopy(L)print'copy.deepcopy:', time()-t
t = time()for i in xrange(num_times):
L[:]print'list slicing [:]:', time()-t
t = time()for i in xrange(num_times):
list(L)print'list(L):', time()-t
t = time()for i in xrange(num_times):[i for i in L]print'list expression(L):', time()-t
t = time()for i in xrange(num_times):
a =[]
a.extend(L)print'list extend:', time()-t
t = time()for i in xrange(num_times):
a =[]for y in L:
a.append(y)print'list append:', time()-t
t = time()for i in xrange(num_times):
a =[]
a.extend(i for i in L)print'generator expression extend:', time()-t
So the fastest is list slicing. But be aware that copy.copy(), list[:] and list(list), unlike copy.deepcopy() and the python version don’t copy any lists, dictionaries and class instances in the list, so if the originals change, they will change in the copied list too and vice versa.
(Here’s the script if anyone’s interested or wants to raise any issues:)
from copy import deepcopy
class old_class:
def __init__(self):
self.blah = 'blah'
class new_class(object):
def __init__(self):
self.blah = 'blah'
dignore = {str: None, unicode: None, int: None, type(None): None}
def Copy(obj, use_deepcopy=True):
t = type(obj)
if t in (list, tuple):
if t == tuple:
# Convert to a list if a tuple to
# allow assigning to when copying
is_tuple = True
obj = list(obj)
else:
# Otherwise just do a quick slice copy
obj = obj[:]
is_tuple = False
# Copy each item recursively
for x in xrange(len(obj)):
if type(obj[x]) in dignore:
continue
obj[x] = Copy(obj[x], use_deepcopy)
if is_tuple:
# Convert back into a tuple again
obj = tuple(obj)
elif t == dict:
# Use the fast shallow dict copy() method and copy any
# values which aren't immutable (like lists, dicts etc)
obj = obj.copy()
for k in obj:
if type(obj[k]) in dignore:
continue
obj[k] = Copy(obj[k], use_deepcopy)
elif t in dignore:
# Numeric or string/unicode?
# It's immutable, so ignore it!
pass
elif use_deepcopy:
obj = deepcopy(obj)
return obj
if __name__ == '__main__':
import copy
from time import time
num_times = 100000
L = [None, 'blah', 1, 543.4532,
['foo'], ('bar',), {'blah': 'blah'},
old_class(), new_class()]
t = time()
for i in xrange(num_times):
Copy(L)
print 'Custom Copy:', time()-t
t = time()
for i in xrange(num_times):
Copy(L, use_deepcopy=False)
print 'Custom Copy Only Copying Lists/Tuples/Dicts (no classes):', time()-t
t = time()
for i in xrange(num_times):
copy.copy(L)
print 'copy.copy:', time()-t
t = time()
for i in xrange(num_times):
copy.deepcopy(L)
print 'copy.deepcopy:', time()-t
t = time()
for i in xrange(num_times):
L[:]
print 'list slicing [:]:', time()-t
t = time()
for i in xrange(num_times):
list(L)
print 'list(L):', time()-t
t = time()
for i in xrange(num_times):
[i for i in L]
print 'list expression(L):', time()-t
t = time()
for i in xrange(num_times):
a = []
a.extend(L)
print 'list extend:', time()-t
t = time()
for i in xrange(num_times):
a = []
for y in L:
a.append(y)
print 'list append:', time()-t
t = time()
for i in xrange(num_times):
a = []
a.extend(i for i in L)
print 'generator expression extend:', time()-t
What are the options to clone or copy a list in Python?
In Python 3, a shallow copy can be made with:
a_copy = a_list.copy()
In Python 2 and 3, you can get a shallow copy with a full slice of the original:
a_copy = a_list[:]
Explanation
There are two semantic ways to copy a list. A shallow copy creates a new list of the same objects, a deep copy creates a new list containing new equivalent objects.
Shallow list copy
A shallow copy only copies the list itself, which is a container of references to the objects in the list. If the objects contained themselves are mutable and one is changed, the change will be reflected in both lists.
There are different ways to do this in Python 2 and 3. The Python 2 ways will also work in Python 3.
Python 2
In Python 2, the idiomatic way of making a shallow copy of a list is with a complete slice of the original:
a_copy = a_list[:]
You can also accomplish the same thing by passing the list through the list constructor,
Using new_list = my_list then modifies new_list every time my_list changes. Why is this?
my_list is just a name that points to the actual list in memory. When you say new_list = my_list you’re not making a copy, you’re just adding another name that points at that original list in memory. We can have similar issues when we make copies of lists.
The list is just an array of pointers to the contents, so a shallow copy just copies the pointers, and so you have two different lists, but they have the same contents. To make copies of the contents, you need a deep copy.
And so we see that the deep copied list is an entirely different list from the original. You could roll your own function – but don’t. You’re likely to create bugs you otherwise wouldn’t have by using the standard library’s deepcopy function.
Don’t use eval
You may see this used as a way to deepcopy, but don’t do it:
problematic_deep_copy = eval(repr(a_list))
It’s dangerous, particularly if you’re evaluating something from a source you don’t trust.
It’s not reliable, if a subelement you’re copying doesn’t have a representation that can be eval’d to reproduce an equivalent element.
There are many answers already that tell you how to make a proper copy, but none of them say why your original ‘copy’ failed.
Python doesn’t store values in variables; it binds names to objects. Your original assignment took the object referred to by my_list and bound it to new_list as well. No matter which name you use there is still only one list, so changes made when referring to it as my_list will persist when referring to it as new_list. Each of the other answers to this question give you different ways of creating a new object to bind to new_list.
Each element of a list acts like a name, in that each element binds non-exclusively to an object. A shallow copy creates a new list whose elements bind to the same objects as before.
new_list = list(my_list) # or my_list[:], but I prefer this syntax
# is simply a shorter way of:
new_list = [element for element in my_list]
To take your list copy one step further, copy each object that your list refers to, and bind those element copies to a new list.
import copy
# each element must have __copy__ defined for this...
new_list = [copy.copy(element) for element in my_list]
This is not yet a deep copy, because each element of a list may refer to other objects, just like the list is bound to its elements. To recursively copy every element in the list, and then each other object referred to by each element, and so on: perform a deep copy.
import copy
# each element must have __deepcopy__ defined for this...
new_list = copy.deepcopy(my_list)
See the documentation for more information about corner cases in copying.
回答 5
采用 thing[:]
>>> a =[1,2]>>> b = a[:]>>> a +=[3]>>> a
[1,2,3]>>> b
[1,2]>>>
Let’s start from the beginning and explore this question.
So let’s suppose you have two lists:
list_1=['01','98']
list_2=[['01','98']]
And we have to copy both lists, now starting from the first list:
So first let’s try by setting the variable copy to our original list, list_1:
copy=list_1
Now if you are thinking copy copied the list_1, then you are wrong. The id function can show us if two variables can point to the same object. Let’s try this:
print(id(copy))
print(id(list_1))
The output is:
4329485320
4329485320
Both variables are the exact same argument. Are you surprised?
So as we know python doesn’t store anything in a variable, Variables are just referencing to the object and object store the value. Here object is a list but we created two references to that same object by two different variable names. This means that both variables are pointing to the same object, just with different names.
When you do copy=list_1, it is actually doing:
Here in the image list_1 and copy are two variable names but the object is same for both variable which is list
So if you try to modify copied list then it will modify the original list too because the list is only one there, you will modify that list no matter you do from the copied list or from the original list:
copy[0]="modify"
print(copy)
print(list_1)
output:
['modify', '98']
['modify', '98']
So it modified the original list :
Now let’s move onto a pythonic method for copying lists.
So as we can see our both list having different id and it means that both variables are pointing to different objects. So what actually going on here is:
Now let’s try to modify the list and let’s see if we still face the previous problem:
copy_1[0]="modify"
print(list_1)
print(copy_1)
The output is:
['01', '98']
['modify', '98']
As you can see, it only modified the copied list. That means it worked.
Do you think we’re done? No. Let’s try to copy our nested list.
copy_2=list_2[:]
list_2 should reference to another object which is copy of list_2. Let’s check:
print(id((list_2)),id(copy_2))
We get the output:
4330403592 4330403528
Now we can assume both lists are pointing different object, so now let’s try to modify it and let’s see it is giving what we want:
copy_2[0][1]="modify"
print(list_2,copy_2)
This gives us the output:
[['01', 'modify']] [['01', 'modify']]
This may seem a little bit confusing, because the same method we previously used worked. Let’s try to understand this.
When you do:
copy_2=list_2[:]
You’re only copying the outer list, not the inside list. We can use the id function once again to check this.
print(id(copy_2[0]))
print(id(list_2[0]))
The output is:
4329485832
4329485832
When we do copy_2=list_2[:], this happens:
It creates the copy of list but only outer list copy, not the nested list copy, nested list is same for both variable, so if you try to modify the nested list then it will modify the original list too as the nested list object is same for both lists.
What is the solution? The solution is the deepcopy function.
METHOD TIME TAKEN
b =[*a]2.75180600000021
b = a *13.50215399999990
b = a[:]3.78278899999986# Python2 winner (see above)
b = a.copy()4.20556500000020# Python3 "slice equivalent" (see above)
b =[]; b.extend(a)4.68069800000012
b = a[0:len(a)]6.84498999999959*b,= a 7.54031799999984
b = list(a)7.75815899999997
b =[i for i in a]18.4886440000000
b = copy.copy(a)18.8254879999999
b =[]for item in a:
b.append(item)35.4729199999997
Here are the timing results using Python 3.6.8. Keep in mind these times are relative to one another, not absolute.
I stuck to only doing shallow copies, and also added some new methods that weren’t possible in Python2, such as list.copy() (the Python3 slice equivalent) and two forms of list unpacking (*new_list, = list and new_list = [*list]):
METHOD TIME TAKEN
b = [*a] 2.75180600000021
b = a * 1 3.50215399999990
b = a[:] 3.78278899999986 # Python2 winner (see above)
b = a.copy() 4.20556500000020 # Python3 "slice equivalent" (see above)
b = []; b.extend(a) 4.68069800000012
b = a[0:len(a)] 6.84498999999959
*b, = a 7.54031799999984
b = list(a) 7.75815899999997
b = [i for i in a] 18.4886440000000
b = copy.copy(a) 18.8254879999999
b = []
for item in a:
b.append(item) 35.4729199999997
We can see the Python2 winner still does well, but doesn’t edge out Python3 list.copy() by much, especially considering the superior readability of the latter.
The dark horse is the unpacking and repacking method (b = [*a]), which is ~25% faster than raw slicing, and more than twice as fast as the other unpacking method (*b, = a).
b = a * 1 also does surprisingly well.
Note that these methods do not output equivalent results for any input other than lists. They all work for sliceable objects, a few work for any iterable, but only copy.copy() works for more general Python objects.
All of the other contributors gave great answers, which work when you have a single dimension (leveled) list, however of the methods mentioned so far, only copy.deepcopy() works to clone/copy a list and not have it point to the nested list objects when you are working with multidimensional, nested lists (list of lists). While Felix Kling refers to it in his answer, there is a little bit more to the issue and possibly a workaround using built-ins that might prove a faster alternative to deepcopy.
While new_list = old_list[:], copy.copy(old_list)' and for Py3k old_list.copy() work for single-leveled lists, they revert to pointing at the list objects nested within the old_list and the new_list, and changes to one of the list objects are perpetuated in the other.
Edit: New information brought to light
As was pointed out by both Aaron Hall and PM 2Ringusing eval() is not only a bad idea, it is also much slower than copy.deepcopy().
This means that for multidimensional lists, the only option is copy.deepcopy(). With that being said, it really isn’t an option as the performance goes way south when you try to use it on a moderately sized multidimensional array. I tried to timeit using a 42×42 array, not unheard of or even that large for bioinformatics applications, and I gave up on waiting for a response and just started typing my edit to this post.
It would seem that the only real option then is to initialize multiple lists and work on them independently. If anyone has any other suggestions, for how to handle multidimensional list copying, it would be appreciated.
As others have stated, there are significant performance issues using the copy module and copy.deepcopyfor multidimensional lists.
>>>from copy import deepcopy
>>> a =[range(i,i+4)for i in range(3)]>>> a
[[0,1,2,3],[1,2,3,4],[2,3,4,5]]>>> b = a*1>>> c = deepcopy(a)>>>for i in(a, b, c):print i
[[0,1,2,3],[1,2,3,4],[2,3,4,5]][[0,1,2,3],[1,2,3,4],[2,3,4,5]][[0,1,2,3],[1,2,3,4],[2,3,4,5]]>>> a[2].append('99')>>>for i in(a, b, c):print i
[[0,1,2,3],[1,2,3,4],[2,3,4,5,99]][[0,1,2,3],[1,2,3,4],[2,3,4,5,99]]#Solution#1 didn't work in nested list[[0,1,2,3],[1,2,3,4],[2,3,4,5]]#Solution #2 - DeepCopy worked in nested list
A very simple approach independent of python version was missing in already given answers which you can use most of the time (at least I do):
new_list = my_list * 1 #Solution 1 when you are not using nested lists
However, If my_list contains other containers (for eg. nested lists) you must use deepcopy as others suggested in the answers above from the copy library. For example:
import copy
new_list = copy.deepcopy(my_list) #Solution 2 when you are using nested lists
.Bonus: If you don’t want to copy elements use (aka shallow copy):
new_list = my_list[:]
Let’s understand difference between Solution#1 and Solution #2
As you can see Solution #1 worked perfectly when we were not using the nested lists. Let’s check what will happen when we apply solution #1 to nested lists.
>>> from copy import deepcopy
>>> a = [range(i,i+4) for i in range(3)]
>>> a
[[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]]
>>> b = a*1
>>> c = deepcopy(a)
>>> for i in (a, b, c): print i
[[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]]
[[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]]
[[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]]
>>> a[2].append('99')
>>> for i in (a, b, c): print i
[[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 99]]
[[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 99]] #Solution#1 didn't work in nested list
[[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]] #Solution #2 - DeepCopy worked in nested list
Note that there are some cases where if you have defined your own custom class and you want to keep the attributes then you should use copy.copy() or copy.deepcopy() rather than the alternatives, for example in Python 3:
import copy
class MyList(list):
pass
lst = MyList([1,2,3])
lst.name = 'custom list'
d = {
'original': lst,
'slicecopy' : lst[:],
'lstcopy' : lst.copy(),
'copycopy': copy.copy(lst),
'deepcopy': copy.deepcopy(lst)
}
for k,v in d.items():
print('lst: {}'.format(k), end=', ')
try:
name = v.name
except AttributeError:
name = 'NA'
print('name: {}'.format(name))
Outputs:
lst: original, name: custom list
lst: slicecopy, name: NA
lst: lstcopy, name: NA
lst: copycopy, name: custom list
lst: deepcopy, name: custom list
new_list = my_list
Try to understand this. Let’s say that my_list is in the heap memory at location X i.e. my_list is pointing to the X. Now by assigning new_list = my_list you’re Letting new_list pointing to the X. This is known as shallow Copy.
Now if you assign new_list = my_list[:] You’re simply copying each object of my_list to new_list. This is known as Deep copy.
I wanted to post something a bit different then some of the other answers. Even though this is most likely not the most understandable, or fastest option, it provides a bit of an inside view of how deep copy works, as well as being another alternative option for deep copying. It doesn’t really matter if my function has bugs, since the point of this is to show a way to copy objects like the question answers, but also to use this as a point to explain how deepcopy works at its core.
At the core of any deep copy function is way to make a shallow copy. How? Simple. Any deep copy function only duplicates the containers of immutable objects. When you deepcopy a nested list, you are only duplicating the outer lists, not the mutable objects inside of the lists. You are only duplicating the containers. The same works for classes, too. When you deepcopy a class, you deepcopy all of its mutable attributes. So, how? How come you only have to copy the containers, like lists, dicts, tuples, iters, classes, and class instances?
It’s simple. A mutable object can’t really be duplicated. It can never be changed, so it is only a single value. That means you never have to duplicate strings, numbers, bools, or any of those. But how would you duplicate the containers? Simple. You make just initialize a new container with all of the values. Deepcopy relies on recursion. It duplicates all the containers, even ones with containers inside of them, until no containers are left. A container is an immutable object.
Once you know that, completely duplicating an object without any references is pretty easy. Here’s a function for deepcopying basic data-types (wouldn’t work for custom classes but you could always add that)
def deepcopy(x):
immutables = (str, int, bool, float)
mutables = (list, dict, tuple)
if isinstance(x, immutables):
return x
elif isinstance(x, mutables):
if isinstance(x, tuple):
return tuple(deepcopy(list(x)))
elif isinstance(x, list):
return [deepcopy(y) for y in x]
elif isinstance(x, dict):
values = [deepcopy(y) for y in list(x.values())]
keys = list(x.keys())
return dict(zip(keys, values))
Python’s own built-in deepcopy is based around that example. The only difference is it supports other types, and also supports user-classes by duplicating the attributes into a new duplicate class, and also blocks infinite-recursion with a reference to an object it’s already seen using a memo list or dictionary. And that’s really it for making deep copies. At its core, making a deep copy is just making shallow copies. I hope this answer adds something to the question.
EXAMPLES
Say you have this list: [1, 2, 3]. The immutable numbers cannot be duplicated, but the other layer can. You can duplicate it using a list comprehension: [x for x in [1, 2, 3]
Now, imagine you have this list: [[1, 2], [3, 4], [5, 6]]. This time, you want to make a function, which uses recursion to deep copy all layers of the list. Instead of the previous list comprehension:
[x for x in _list]
It uses a new one for lists:
[deepcopy_list(x) for x in _list]
And deepcopy_list looks like this:
def deepcopy_list(x):
if isinstance(x, (str, bool, float, int)):
return x
else:
return [deepcopy_list(y) for y in x]
Then now you have a function which can deepcopy any list of strs, bools, floast, ints and even lists to infinitely many layers using recursion. And there you have it, deepcopying.
TLDR: Deepcopy uses recursion to duplicate objects, and merely returns the same immutable objects as before, as immutable objects cannot be duplicated. However, it deepcopies the most inner layers of mutable objects until it reaches the outermost mutable layer of an object.
回答 15
从id和gc进入内存的实用角度。
>>> b = a =['hell','word']>>> c =['hell','word']>>> id(a), id(b), id(c)(4424020872,4424020872,4423979272)||----------->>> id(a[0]), id(b[0]), id(c[0])(4424018328,4424018328,4424018328)# all referring to same 'hell'|||----------------------->>> id(a[0][0]), id(b[0][0]), id(c[0][0])(4422785208,4422785208,4422785208)# all referring to same 'h'|||----------------------->>> a[0]+='o'>>> a,b,c
(['hello','word'],['hello','word'],['hell','word'])# b changed too>>> id(a[0]), id(b[0]), id(c[0])(4424018384,4424018384,4424018328)# augmented assignment changed a[0],b[0]||----------->>> b = a =['hell','word']>>> id(a[0]), id(b[0]), id(c[0])(4424018328,4424018328,4424018328)# the same hell|||----------------------->>>import gc
>>> gc.get_referrers(a[0])[['hell','word'],['hell','word']]# one copy belong to a,b, the another for c>>> gc.get_referrers(('hell'))[['hell','word'],['hell','word'],('hell',None)]# ('hello', None)
List2 isn’t storing the actual list, but a reference to list1. So when you do anything to list1, list2 changes as well. use the copy module (not default, download on pip) to make an original copy of the list(copy.copy() for simple lists, copy.deepcopy() for nested ones). This makes a copy that doesn’t change with the first list.
回答 17
deepcopy选项是唯一适用于我的方法:
from copy import deepcopy
a =[[ list(range(1,3))for i in range(3)]]
b = deepcopy(a)
b[0][1]=[3]print('Deep:')print(a)print(b)print('-----------------------------')
a =[[ list(range(1,3))for i in range(3)]]
b = a*1
b[0][1]=[3]print('*1:')print(a)print(b)print('-----------------------------')
a =[[ list(range(1,3))for i in range(3)]]
b = a[:]
b[0][1]=[3]print('Vector copy:')print(a)print(b)print('-----------------------------')
a =[[ list(range(1,3))for i in range(3)]]
b = list(a)
b[0][1]=[3]print('List copy:')print(a)print(b)print('-----------------------------')
a =[[ list(range(1,3))for i in range(3)]]
b = a.copy()
b[0][1]=[3]print('.copy():')print(a)print(b)print('-----------------------------')
a =[[ list(range(1,3))for i in range(3)]]
b = a
b[0][1]=[3]print('Shallow:')print(a)print(b)print('-----------------------------')
The deepcopy option is the only method that works for me:
from copy import deepcopy
a = [ [ list(range(1, 3)) for i in range(3) ] ]
b = deepcopy(a)
b[0][1]=[3]
print('Deep:')
print(a)
print(b)
print('-----------------------------')
a = [ [ list(range(1, 3)) for i in range(3) ] ]
b = a*1
b[0][1]=[3]
print('*1:')
print(a)
print(b)
print('-----------------------------')
a = [ [ list(range(1, 3)) for i in range(3) ] ]
b = a[:]
b[0][1]=[3]
print('Vector copy:')
print(a)
print(b)
print('-----------------------------')
a = [ [ list(range(1, 3)) for i in range(3) ] ]
b = list(a)
b[0][1]=[3]
print('List copy:')
print(a)
print(b)
print('-----------------------------')
a = [ [ list(range(1, 3)) for i in range(3) ] ]
b = a.copy()
b[0][1]=[3]
print('.copy():')
print(a)
print(b)
print('-----------------------------')
a = [ [ list(range(1, 3)) for i in range(3) ] ]
b = a
b[0][1]=[3]
print('Shallow:')
print(a)
print(b)
print('-----------------------------')