from collections import defaultdict
d = defaultdict(defaultdict)
a =[("key1",{"a1":22,"a2":33}),("key2",{"a1":32,"a2":55}),("key3",{"a1":43,"a2":44})]for i in a:
d[i[0]]= i[1]
Thanks to some great folks on SO, I discovered the possibilities offered by collections.defaultdict, notably in readability and speed. I have put them to use with success.
Now I would like to implement three levels of dictionaries, the two top ones being defaultdict and the lowest one being int. I don’t find the appropriate way to do this. Here is my attempt:
from collections import defaultdict
d = defaultdict(defaultdict)
a = [("key1", {"a1":22, "a2":33}),
("key2", {"a1":32, "a2":55}),
("key3", {"a1":43, "a2":44})]
for i in a:
d[i[0]] = i[1]
Now this works, but the following, which is the desired behavior, doesn’t:
d["key4"]["a1"] + 1
I suspect that I should have declared somewhere that the second level defaultdict is of type int, but I didn’t find where or how to do so.
The reason I am using defaultdict in the first place is to avoid having to initialize the dictionary for each new key.
Any more elegant suggestion?
Thanks pythoneers!
回答 0
用:
from collections import defaultdict
d = defaultdict(lambda: defaultdict(int))
Another way to make a pickleable, nested defaultdict is to use a partial object instead of a lambda:
from functools import partial
...
d = defaultdict(partial(defaultdict, int))
This will work because the defaultdict class is globally accessible at the module level:
“You can’t pickle a partial object unless the function [or in this
case, class] it wraps is globally accessible … under its __name__
(within its __module__)”
— Pickling wrapped partial functions
classAutoVivification(dict):"""Implementation of perl's autovivification feature."""def __getitem__(self, item):try:return dict.__getitem__(self, item)exceptKeyError:
value = self[item]= type(self)()return value
测试:
a =AutoVivification()
a[1][2][3]=4
a[1][3][3]=5
a[1][2]['test']=6print a
from numbers importNumberclass autovivify(dict):def __missing__(self, key):
value = self[key]= type(self)()return valuedef __add__(self, x):""" override addition for numeric types when self is empty """ifnot self and isinstance(x,Number):return xraiseValueErrordef __sub__(self, x):ifnot self and isinstance(x,Number):return-1* xraiseValueError
As per @rschwieb’s request for D['key'] += 1, we can expand on previous by overriding addition by defining __add__ method, to make this behave more like a collections.Counter()
First __missing__ will be called to create a new empty value, which will be passed into __add__. We test the value, counting on empty values to be False.
from numbers import Number
class autovivify(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
def __add__(self, x):
""" override addition for numeric types when self is empty """
if not self and isinstance(x, Number):
return x
raise ValueError
def __sub__(self, x):
if not self and isinstance(x, Number):
return -1 * x
raise ValueError
Examples:
>>> import autovivify
>>> a = autovivify.autovivify()
>>> a
{}
>>> a[2]
{}
>>> a
{2: {}}
>>> a[4] += 1
>>> a[5][3][2] -= 1
>>> a
{2: {}, 4: 1, 5: {3: {2: -1}}}
Rather than checking argument is a Number (very non-python, amirite!) we could just provide a default 0 value and then attempt the operation:
class av2(dict):
def __missing__(self, key):
value = self[key] = type(self)()
return value
def __add__(self, x):
""" override addition when self is empty """
if not self:
return 0 + x
raise ValueError
def __sub__(self, x):
""" override subtraction when self is empty """
if not self:
return 0 - x
raise ValueError
回答 4
晚会晚了,但是对于任意深度,我只是发现自己在做这样的事情:
from collections import defaultdict
classDeepDict(defaultdict):def __call__(self):returnDeepDict(self.default_factory)
I’m trying to make a set of sets in Python. I can’t figure out how to do it.
Starting with the empty set xx:
xx = set([])
# Now we have some other set, for example
elements = set([2,3,4])
xx.add(elements)
but I get
TypeError: unhashable type: 'list'
or
TypeError: unhashable type: 'set'
Is it possible to have a set of sets in Python?
I am dealing with a large collection of sets and I want to be able to not have to deal duplicate sets (a set B of sets A1, A2, …., An would “cancel” two sets if Ai = Aj)
Python’s complaining because the inner set objects are mutable and thus not hashable. The solution is to use frozenset for the inner sets, to indicate that you have no intention of modifying them.
So I had the exact same problem. I wanted to make a data structure that works as a set of sets. The problem is that the sets must contain immutable objects. So, what you can do is simply make it as a set of tuples. That worked fine for me!
A = set()
A.add( (2,3,4) )##adds the element
A.add( (2,3,4) )##does not add the same element
A.add( (2,3,5) )##adds the element, because it is different!
classOuter(object):def __init__(self):
self.outer_var =1def get_inner(self):return self.Inner(self)# "self.Inner" is because Inner is a class attribute of this class# "Outer.Inner" would also work, or move Inner to global scope# and then just use "Inner"classInner(object):def __init__(self, outer):
self.outer = outer
@propertydef inner_var(self):return self.outer.outer_var
class Outer(object):
outer_var = 1
class Inner(object):
@property
def inner_var(self):
return Outer.outer_var
This isn’t quite the same as similar things work in other languages, and uses global lookup instead of scoping the access to outer_var. (If you change what object the name Outer is bound to, then this code will use that object the next time it is executed.)
If you instead want all Inner objects to have a reference to an Outer because outer_var is really an instance attribute:
class Outer(object):
def __init__(self):
self.outer_var = 1
def get_inner(self):
return self.Inner(self)
# "self.Inner" is because Inner is a class attribute of this class
# "Outer.Inner" would also work, or move Inner to global scope
# and then just use "Inner"
class Inner(object):
def __init__(self, outer):
self.outer = outer
@property
def inner_var(self):
return self.outer.outer_var
Note that nesting classes is somewhat uncommon in Python, and doesn’t automatically imply any sort of special relationship between the classes. You’re better off not nesting. (You can still set a class attribute on Outer to Inner, if you want.)
class OuterClass:
outer_var = 1
class InnerClass:
pass
InnerClass.inner_var = outer_var
The problem you encountered is due to this:
A block is a piece of Python program text that is executed as a unit.
The following are blocks: a module, a function body, and a class
definition.
(…)
A scope defines the visibility of a name within
a block.
(…)
The scope of names defined in a class block is
limited to the class block; it does not extend to the code blocks of
methods – this includes generator expressions since they are
implemented using a function scope. This means that the following will
fail:
class A:
a = 42
b = list(a + i for i in range(10))
The above means:
a function body is a code block and a method is a function, then names defined out of the function body present in a class definition do not extend to the function body.
Paraphrasing this for your case:
a class definition is a code block, then names defined out of the inner class definition present in an outer class definition do not extend to the inner class definition.
回答 2
如果您不使用嵌套类,则可能会更好。如果必须嵌套,请尝试以下操作:
x =1classOuterClass:
outer_var = x
classInnerClass:
inner_var = x
All explanations can be found in Python Documentation The Python Tutorial
For your first error <type 'exceptions.NameError'>: name 'outer_var' is not defined. The explanation is:
There is no shorthand for referencing data attributes (or other methods!) from within methods. I find that this actually increases the readability of methods: there is no chance of confusing local variables and instance variables when glancing through a method.
quoted from The Python Tutorial 9.4
For your second error <type 'exceptions.NameError'>: name 'OuterClass' is not defined
When a class definition is left normally (via the end), a class object is created.
quoted from The Python Tutorial 9.3.1
So when you try inner_var = Outerclass.outer_var, the Quterclass hasn’t been created yet, that’s why name 'OuterClass' is not defined
A more detailed but tedious explanation for your first error:
Although classes have access to enclosing functions’ scopes, though, they do not act
as enclosing scopes to code nested within the class: Python searches enclosing functions
for referenced names, but never any enclosing classes. That is, a class is a local scope
and has access to enclosing local scopes, but it does not serve as an enclosing local scope
to further nested code.
try:
something()exceptSomeErroras e:try:
plan_B()exceptAlsoFailsError:raise e # I'd like to raise the SomeError as if plan_B()# didn't raise the AlsoFailsError
I know that if I want to re-raise an exception, I simple use raise without arguments in the respective except block. But given a nested expression like
try:
something()
except SomeError as e:
try:
plan_B()
except AlsoFailsError:
raise e # I'd like to raise the SomeError as if plan_B()
# didn't raise the AlsoFailsError
how can I re-raise the SomeError without breaking the stack trace? raise alone would in this case re-raise the more recent AlsoFailsError. Or how could I refactor my code to avoid this issue?
回答 0
从Python 3开始,回溯存储在异常中,因此raise e(大多数)正确的事情很简单:
try:
something()exceptSomeErroras e:try:
plan_B()exceptAlsoFailsError:raise e # or raise e from None - see below
产生的追溯将包括SomeError在处理过程中发生的其他通知AlsoFailsError(由于位于raise e内部except AlsoFailsError)。这具有误导性,因为实际发生的是相反的情况-我们AlsoFailsError在尝试从恢复时遇到并处理了它SomeError。要获取不包含的回溯AlsoFailsError,请替换raise e为raise e from None。
As of Python 3 the traceback is stored in the exception, so a simple raise e will do the (mostly) right thing:
try:
something()
except SomeError as e:
try:
plan_B()
except AlsoFailsError:
raise e # or raise e from None - see below
The traceback produced will include an additional notice that SomeError occurred while handling AlsoFailsError (because of raise e being inside except AlsoFailsError). This is misleading because what actually happened is the other way around – we encountered AlsoFailsError, and handled it, while trying to recover from SomeError. To obtain a traceback that doesn’t include AlsoFailsError, replace raise e with raise e from None.
In Python 2 you’d store the exception type, value, and traceback in local variables and use the three-argument form of raise:
Python 3.5+ attaches the traceback information to the error anyway, so it’s no longer necessary to save it separately.
>>> def f():
... try:
... raise SyntaxError
... except Exception as e:
... err = e
... try:
... raise AttributeError
... except Exception as e1:
... raise err from None
>>> f()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 9, in f
File "<stdin>", line 3, in f
SyntaxError: None
>>>
Wrote this function in python that transposes a matrix:
def transpose(m):
height = len(m)
width = len(m[0])
return [ [ m[i][j] for i in range(0, height) ] for j in range(0, width) ]
In the process I realized I don’t fully understand how single line nested for loops execute. Please help me understand by answering the following questions:
What is the order in which this for loop executes?
If I had a triple nested for loop, what order would it execute?
What would be equal the equal unnested for loop?
Given,
[ function(i,j) for i,j in object ]
What type must object be in order to use this for loop structure?
What is the order in which i and j are assigned to elements in object?
Can it be simulated by a different for loop structure?
Can this for loop be nested with a similar or different structure for loop? And how would it look?
The best source of information is the official Python tutorial on list comprehensions. List comprehensions are nearly the same as for loops (certainly any list comprehension can be written as a for-loop) but they are often faster than using a for loop.
Look at this longer list comprehension from the tutorial (the if part filters the comprehension, only parts that pass the if statement are passed into the final part of the list comprehension (here (x,y)):
>>> [(x, y) for x in [1,2,3] for y in [3,1,4] if x != y]
[(1, 3), (1, 4), (2, 3), (2, 1), (2, 4), (3, 1), (3, 4)]
It’s exactly the same as this nested for loop (and, as the tutorial says, note how the order of for and if are the same).
>>> combs = []
>>> for x in [1,2,3]:
... for y in [3,1,4]:
... if x != y:
... combs.append((x, y))
...
>>> combs
[(1, 3), (1, 4), (2, 3), (2, 1), (2, 4), (3, 1), (3, 4)]
The major difference between a list comprehension and a for loop is that the final part of the for loop (where you do something) comes at the beginning rather than at the end.
On to your questions:
What type must object be in order to use this for loop structure?
An iterable. Any object that can generate a (finite) set of elements. These include any container, lists, sets, generators, etc.
What is the order in which i and j are assigned to elements in object?
They are assigned in exactly the same order as they are generated from each list, as if they were in a nested for loop (for your first comprehension you’d get 1 element for i, then every value from j, 2nd element into i, then every value from j, etc.)
Can it be simulated by a different for loop structure?
Yes, already shown above.
Can this for loop be nested with a similar or different structure for loop? And how would it look?
Sure, but it’s not a great idea. Here, for example, gives you a list of lists of characters:
[[ch for ch in word] for word in ("apple", "banana", "pear", "the", "hello")]
You might be interested in itertools.product, which returns an iterable yielding tuples of values from all the iterables you pass it. That is, itertools.product(A, B) yields all values of the form (a, b), where the a values come from A and the b values come from B. For example:
import itertools
A = [50, 60, 70]
B = [0.1, 0.2, 0.3, 0.4]
print [a + b for a, b in itertools.product(A, B)]
Notice how the final argument passed to itertools.product is the “inner” one. Generally, itertools.product(a0, a1, ... an) is equal to [(i0, i1, ... in) for in in an for in-1 in an-1 ... for i0 in a0]
Below code for best examples for nested loops, while using two for loops please remember the output of the first loop is input for the second loop.
Loop termination also important while using the nested loops
for x in range(1, 10, 1):
for y in range(1,x):
print y,
print
OutPut :
1
1 2
1 2 3
1 2 3 4
1 2 3 4 5
1 2 3 4 5 6
1 2 3 4 5 6 7
1 2 3 4 5 6 7 8
classOuter(object):def some_method(self):# do somethingclassInner(object):def __init__(self):
self.Outer.some_method()# <-- this is the line in question
class Outer(object):
def some_method(self):
# do something
class Inner(object):
def __init__(self):
self.Outer.some_method() # <-- this is the line in question
How can I access the Outer class’s method from the Inner class?
The methods of a nested class cannot directly access the instance attributes of the outer class.
Note that it is not necessarily the case that an instance of the outer class exists even when you have created an instance of the inner class.
In fact, it is often recommended against using nested classes, since the nesting does not imply any particular relationship between the inner and outer classes.
You’re trying to access Outer’s class instance, from inner class instance. So just use factory-method to build Inner instance and pass Outer instance to it.
class Outer(object):
def createInner(self):
return Outer.Inner(self)
class Inner(object):
def __init__(self, outer_instance):
self.outer_instance = outer_instance
self.outer_instance.somemethod()
def inner_method(self):
self.outer_instance.anothermethod()
even push the boat out a bit and extend this inner class (NB to get super() to work you have to change the class signature of mooble to “class mooble( object )”
mrh1997 raised an interesting point about the non-common inheritance of inner classes delivered using this technique. But it seems that the solution is pretty straightforward:
Do you mean to use inheritance, rather than nesting classes like this? What you’re doing doesn’t make a heap of sense in Python.
You can access the Outer‘s some_method by just referencing Outer.some_method within the inner class’s methods, but it’s not going to work as you expect it will. For example, if you try this:
class Outer(object):
def some_method(self):
# do something
class Inner(object):
def __init__(self):
Outer.some_method()
…you’ll get a TypeError when initialising an Inner object, because Outer.some_method expects to receive an Outer instance as its first argument. (In the example above, you’re basically trying to call some_method as a class method of Outer.)
You can easily access to outer class using metaclass: after creation of outer class check it’s attribute dict for any classes (or apply any logic you need – mine is just trivial example) and set corresponding values:
import six
import inspect
# helper method from `peewee` project to add metaclass
_METACLASS_ = '_metaclass_helper_'
def with_metaclass(meta, base=object):
return meta(_METACLASS_, (base,), {})
class OuterMeta(type):
def __new__(mcs, name, parents, dct):
cls = super(OuterMeta, mcs).__new__(mcs, name, parents, dct)
for klass in dct.values():
if inspect.isclass(klass):
print("Setting outer of '%s' to '%s'" % (klass, cls))
klass.outer = cls
return cls
# @six.add_metaclass(OuterMeta) -- this is alternative to `with_metaclass`
class Outer(with_metaclass(OuterMeta)):
def foo(self):
return "I'm outer class!"
class Inner(object):
outer = None # <-- by default it's None
def bar(self):
return "I'm inner class"
print(Outer.Inner.outer)
>>> <class '__main__.Outer'>
assert isinstance(Outer.Inner.outer(), Outer)
print(Outer().foo())
>>> I'm outer class!
print(Outer.Inner.outer().foo())
>>> I'm outer class!
print(Outer.Inner().outer().foo())
>>> I'm outer class!
print(Outer.Inner().bar())
>>> I'm inner class!
Using this approach, you can easily bind and refer two classes between each other.
class higher_level__unknown_irrelevant_name__class:def __init__(self,...args...):...other code...# Important lines to access sub-classes.
subclasses = self._subclass_container()
self.some_subclass = subclasses["some_subclass"]del subclasses # Free up variable for other use.def sub_function(self,...args...):...other code...def _subclass_container(self):
_parent_class = self # Create access to parent class.class some_subclass:def __init__(self):
self._parent_class = _parent_class # Easy access from self.# Optional line, clears variable space, but SHOULD NOT BE USED# IF THERE ARE MULTIPLE SUBCLASSES as would stop their parent access.# del _parent_classclass subclass_2:def __init__(self):
self._parent_class = _parent_class
# Return reference(s) to the subclass(es).return{"some_subclass": some_subclass,"subclass_2": subclass_2}
I’ve created some Python code to use an outer class from its inner class, based on a good idea from another answer for this question. I think it’s short, simple and easy to understand.
class higher_level__unknown_irrelevant_name__class:
def __init__(self, ...args...):
...other code...
# Important lines to access sub-classes.
subclasses = self._subclass_container()
self.some_subclass = subclasses["some_subclass"]
del subclasses # Free up variable for other use.
def sub_function(self, ...args...):
...other code...
def _subclass_container(self):
_parent_class = self # Create access to parent class.
class some_subclass:
def __init__(self):
self._parent_class = _parent_class # Easy access from self.
# Optional line, clears variable space, but SHOULD NOT BE USED
# IF THERE ARE MULTIPLE SUBCLASSES as would stop their parent access.
# del _parent_class
class subclass_2:
def __init__(self):
self._parent_class = _parent_class
# Return reference(s) to the subclass(es).
return {"some_subclass": some_subclass, "subclass_2": subclass_2}
The main code, “production ready” (without comments, etc.). Remember to replace all of each value in angle brackets (e.g. <x>) with the desired value.
class <higher_level_class>:
def __init__(self):
subclasses = self._subclass_container()
self.<sub_class> = subclasses[<sub_class, type string>]
del subclasses
def _subclass_container(self):
_parent_class = self
class <sub_class>:
def __init__(self):
self._parent_class = _parent_class
return {<sub_class, type string>: <sub_class>}
Explanation of how this method works (the basic steps):
Create a function named _subclass_container to act as a wrapper to access the variable self, a reference to the higher level class (from code running inside the function).
Create a variable named _parent_class which is a reference to the variable self of this function, that the sub-classes of _subclass_container can access (avoids name conflicts with other self variables in subclasses).
Return the sub-class/sub-classes as a dictionary/list so code calling the _subclass_container function can access the sub-classes inside.
In the __init__ function inside the higher level class (or wherever else needed), receive the returned sub-classes from the function _subclass_container into the variable subclasses.
Assign sub-classes stored in the subclasses variable to attributes of the higher level class.
A few tips to make scenarios easier:
Making the code to assign the sub classes to the higher level class easier to copy and be used in classes derived from the higher level class that have their__init__function changed:
Insert before line 12 in the main code:
def _subclass_init(self):
Then insert into this function lines 5-6 (of the main code) and replace lines 4-7 with the following code:
self._subclass_init(self)
Making subclass assigning to the higher level class possible when there are many/unknown quantities of subclasses.
Replace line 6 with the following code:
for subclass_name in list(subclasses.keys()):
setattr(self, subclass_name, subclasses[subclass_name])
Example scenario of where this solution would be useful and where the higher level class name should be impossible to get:
A class, named “a” (class a:) is created. It has subclasses that need to access it (the parent). One subclass is called “x1”. In this subclass, the code a.run_func() is run.
Then another class, named “b” is created, derived from class “a” (class b(a):). After that, some code runs b.x1() (calling the sub function “x1” of b, a derived sub-class). This function runs a.run_func(), calling the function “run_func” of class “a”, not the function “run_func” of its parent, “b” (as it should), because the function which was defined in class “a” is set to refer to the function of class “a”, as that was its parent.
This would cause problems (e.g. if function a.run_func has been deleted) and the only solution without rewriting the code in class a.x1 would be to redefine the sub-class x1 with updated code for all classes derived from class “a” which would obviously be difficult and not worth it.
classOuter(object):@staticmethoddef some_static_method(self):# do somethingclassInner(object):def __init__(self):
self.some_static_method()# <-- this will work laterInner.some_static_method = some_static_method
Expanding on @tsnorri’s cogent thinking, that the outer method may be a static method:
class Outer(object):
@staticmethod
def some_static_method(self):
# do something
class Inner(object):
def __init__(self):
self.some_static_method() # <-- this will work later
Inner.some_static_method = some_static_method
Now the line in question should work by the time it is actually called.
The last line in the above code gives the Inner class a static method that’s a clone of the Outer static method.
Usually, the local scope references the local names of the (textually) current function.
…or current class in our case. So objects “local” to the definition of the Outer class (Inner and some_static_method) may be referred to directly within that definition.
classParent():def __init__(self, name):
self.name = name
self.children =[]classInner(object):passdefChild(self, name):
parent = self
classChild(Parent.Inner):def __init__(self, name):
self.name = name
self.parent = parent
parent.children.append(self)returnChild(name)
parent =Parent('Bar')
child1 = parent.Child('Foo')
child2 = parent.Child('World')print(# Getting its first childs name
child1.name,# From itself
parent.children[0].name,# From its parent# Also works with the second child
child2.name,
parent.children[1].name,# Go nuts if you want
child2.parent.children[0].name,
child1.parent.children[1].name
)print(# Getting the parents name
parent.name,# From itself
child1.parent.name,# From its children
child2.parent.name,# Go nuts again if you want
parent.children[0].parent.name,
parent.children[1].parent.name,# Or insane
child2.parent.children[0].parent.children[1].parent.name,
child1.parent.children[1].parent.children[0].parent.name
)# Second parent? No problem
parent2 =Parent('John')
child3 = parent2.Child('Doe')
child4 = parent2.Child('Appleseed')print(
child3.name, parent2.children[0].name,
child4.name, parent2.children[1].name,
parent2.name # ....)
A few years late to the party…. but to expand on @mike rodent‘s wonderful answer, I’ve provided my own example below that shows just how flexible his solution is, and why it should be (or should have been) the accepted answer.
Python 3.7
class Parent():
def __init__(self, name):
self.name = name
self.children = []
class Inner(object):
pass
def Child(self, name):
parent = self
class Child(Parent.Inner):
def __init__(self, name):
self.name = name
self.parent = parent
parent.children.append(self)
return Child(name)
parent = Parent('Bar')
child1 = parent.Child('Foo')
child2 = parent.Child('World')
print(
# Getting its first childs name
child1.name, # From itself
parent.children[0].name, # From its parent
# Also works with the second child
child2.name,
parent.children[1].name,
# Go nuts if you want
child2.parent.children[0].name,
child1.parent.children[1].name
)
print(
# Getting the parents name
parent.name, # From itself
child1.parent.name, # From its children
child2.parent.name,
# Go nuts again if you want
parent.children[0].parent.name,
parent.children[1].parent.name,
# Or insane
child2.parent.children[0].parent.children[1].parent.name,
child1.parent.children[1].parent.children[0].parent.name
)
# Second parent? No problem
parent2 = Parent('John')
child3 = parent2.Child('Doe')
child4 = parent2.Child('Appleseed')
print(
child3.name, parent2.children[0].name,
child4.name, parent2.children[1].name,
parent2.name # ....
)
Output:
Foo Foo World World Foo World
Bar Bar Bar Bar Bar Bar Bar
Doe Doe Appleseed Appleseed John
Again, a wonderful answer, props to you mike!
回答 10
这太简单了:
输入:
class A:def __init__(self):passdef func1(self):print('class A func1')class B:def __init__(self):
a1 = A()
a1.func1()def func1(self):print('class B func1')
b = A.B()
b.func1()
‘Mapping’ file has 4 columns: Device_Name, GDN, Device_Type, and Device_OS. All four columns are populated.
‘Data’ file has these same columns, with Device_Name column populated and the other three columns blank.
I want my Python code to open both files and for each Device_Name in the Data file, map its GDN, Device_Type, and Device_OS value from the Mapping file.
I know how to use dict when only 2 columns are present (1 is needed to be mapped) but I don’t know how to accomplish this when 3 columns need to be mapped.
Following is the code using which I tried to accomplish mapping of Device_Type:
x = dict([])
with open("Pricing Mapping_2013-04-22.csv", "rb") as in_file1:
file_map = csv.reader(in_file1, delimiter=',')
for row in file_map:
typemap = [row[0],row[2]]
x.append(typemap)
with open("Pricing_Updated_Cleaned.csv", "rb") as in_file2, open("Data Scraper_GDN.csv", "wb") as out_file:
writer = csv.writer(out_file, delimiter=',')
for row in csv.reader(in_file2, delimiter=','):
try:
row[27] = x[row[11]]
except KeyError:
row[27] = ""
writer.writerow(row)
It returns Attribute Error.
After some researching, I think I need to create a nested dict, but I don’t have any idea how to do this.
回答 0
嵌套字典是字典中的字典。非常简单的事情。
>>> d ={}>>> d['dict1']={}>>> d['dict1']['innerkey']='value'>>> d
{'dict1':{'innerkey':'value'}}
>>>import collections
>>> d = collections.defaultdict(dict)>>> d['dict1']['innerkey']='value'>>> d # currently a defaultdict type
defaultdict(<type 'dict'>,{'dict1':{'innerkey':'value'}})>>> dict(d)# but is exactly like a normal dictionary.{'dict1':{'innerkey':'value'}}
您可以根据需要填充。
我建议在你的代码的东西像下面:
d ={}# can use defaultdict(dict) insteadfor row in file_map:# derive row key from something # when using defaultdict, we can skip the next step creating a dictionary on row_key
d[row_key]={}for idx, col in enumerate(row):
d[row_key][idx]= col
a_file ="path/to/a.csv"
b_file ="path/to/b.csv"# read from file a.csvwith open(a_file)as f:# skip headers
f.next()# get first colum as keys
keys =(line.split(',')[0]for line in f)# create empty dictionary:
d ={}# read from file b.csvwith open(b_file)as f:# gather headers except first key header
headers = f.next().split(',')[1:]# iterate linesfor line in f:# gather the colums
cols = line.strip().split(',')# check to make sure this key should be mapped.if cols[0]notin keys:continue# add key to dict
d[cols[0]]= dict(# inner keys are the header names, values are columns(headers[idx], v)for idx, v in enumerate(cols[1:]))
A nested dict is a dictionary within a dictionary. A very simple thing.
>>> d = {}
>>> d['dict1'] = {}
>>> d['dict1']['innerkey'] = 'value'
>>> d
{'dict1': {'innerkey': 'value'}}
You can also use a defaultdict from the collections package to facilitate creating nested dictionaries.
>>> import collections
>>> d = collections.defaultdict(dict)
>>> d['dict1']['innerkey'] = 'value'
>>> d # currently a defaultdict type
defaultdict(<type 'dict'>, {'dict1': {'innerkey': 'value'}})
>>> dict(d) # but is exactly like a normal dictionary.
{'dict1': {'innerkey': 'value'}}
You can populate that however you want.
I would recommend in your code something like the following:
d = {} # can use defaultdict(dict) instead
for row in file_map:
# derive row key from something
# when using defaultdict, we can skip the next step creating a dictionary on row_key
d[row_key] = {}
for idx, col in enumerate(row):
d[row_key][idx] = col
may be above code is confusing the question. My problem in nutshell: I
have 2 files a.csv b.csv, a.csv has 4 columns i j k l, b.csv also has
these columns. i is kind of key columns for these csvs’. j k l column
is empty in a.csv but populated in b.csv. I want to map values of j k
l columns using ‘i` as key column from b.csv to a.csv file
My suggestion would be something like this (without using defaultdict):
a_file = "path/to/a.csv"
b_file = "path/to/b.csv"
# read from file a.csv
with open(a_file) as f:
# skip headers
f.next()
# get first colum as keys
keys = (line.split(',')[0] for line in f)
# create empty dictionary:
d = {}
# read from file b.csv
with open(b_file) as f:
# gather headers except first key header
headers = f.next().split(',')[1:]
# iterate lines
for line in f:
# gather the colums
cols = line.strip().split(',')
# check to make sure this key should be mapped.
if cols[0] not in keys:
continue
# add key to dict
d[cols[0]] = dict(
# inner keys are the header names, values are columns
(headers[idx], v) for idx, v in enumerate(cols[1:]))
Please note though, that for parsing csv files there is a csv module.
In [2]: def nested_dict():
...: return collections.defaultdict(nested_dict)
...:
In [3]: a = nested_dict()
In [4]: a
Out[4]: defaultdict(<function __main__.nested_dict>, {})
In [5]: a['a']['b']['c'] = 1
In [6]: a
Out[6]:
defaultdict(<function __main__.nested_dict>,
{'a': defaultdict(<function __main__.nested_dict>,
{'b': defaultdict(<function __main__.nested_dict>,
{'c': 1})})})
import nested_dict as nd
nest = nd.nested_dict()
nest['outer1']['inner1']='v11'
nest['outer1']['inner2']='v12'print('original nested dict: \n', nest)try:
nest['outer1']['wrong_key1']exceptKeyErroras e:print('exception missing key', e)print('nested dict after lookup with missing key. no exception raised:\n', nest)# Instead, convert back to normal dict...
nest_d = nest.to_dict(nest)try:print('converted to normal dict. Trying to lookup Wrong_key2')
nest_d['outer1']['wrong_key2']exceptKeyErroras e:print('exception missing key', e)else:print(' no exception raised:\n')# ...or use dict.keys to check if key in nested dictprint('checking with dict.keys')print(list(nest['outer1'].keys()))if'wrong_key3'in list(nest.keys()):print('found wrong_key3')else:print(' did not find wrong_key3')
输出为:
original nested dict:{"outer1":{"inner2":"v12","inner1":"v11"}}
nested dict after lookup with missing key. no exception raised:{"outer1":{"wrong_key1":{},"inner2":"v12","inner1":"v11"}}
converted to normal dict.Trying to lookup Wrong_key2
exception missing key 'wrong_key2'
checking with dict.keys
['wrong_key1','inner2','inner1']
did not find wrong_key3
It is important to remember when using defaultdict and similar nested dict modules such as nested_dict, that looking up a nonexistent key may inadvertently create a new key entry in the dict and cause a lot of havoc.
Here is a Python3 example with nested_dict module:
import nested_dict as nd
nest = nd.nested_dict()
nest['outer1']['inner1'] = 'v11'
nest['outer1']['inner2'] = 'v12'
print('original nested dict: \n', nest)
try:
nest['outer1']['wrong_key1']
except KeyError as e:
print('exception missing key', e)
print('nested dict after lookup with missing key. no exception raised:\n', nest)
# Instead, convert back to normal dict...
nest_d = nest.to_dict(nest)
try:
print('converted to normal dict. Trying to lookup Wrong_key2')
nest_d['outer1']['wrong_key2']
except KeyError as e:
print('exception missing key', e)
else:
print(' no exception raised:\n')
# ...or use dict.keys to check if key in nested dict
print('checking with dict.keys')
print(list(nest['outer1'].keys()))
if 'wrong_key3' in list(nest.keys()):
print('found wrong_key3')
else:
print(' did not find wrong_key3')
Output is:
original nested dict: {"outer1": {"inner2": "v12", "inner1": "v11"}}
nested dict after lookup with missing key. no exception raised:
{"outer1": {"wrong_key1": {}, "inner2": "v12", "inner1": "v11"}}
converted to normal dict.
Trying to lookup Wrong_key2
exception missing key 'wrong_key2'
checking with dict.keys
['wrong_key1', 'inner2', 'inner1']
did not find wrong_key3
Had the same issue and got curious about the performance of each solution.
Here’s is the %timeit:
import numpy as np
lst = [['a','b','c'], [1,2,3], ['x','y','z']]
The first numpy-way, transforming the array:
%timeit list(np.array(lst).T[0])
4.9 µs ± 163 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Fully native using list comprehension (as explained by @alecxe):
%timeit [item[0] for item in lst]
379 ns ± 23.1 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
Another native way using zip (as explained by @dawg):
%timeit list(zip(*lst))[0]
585 ns ± 7.26 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
Second numpy-way. Also explained by @dawg:
%timeit list(np.array(lst)[:,0])
4.95 µs ± 179 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Surprisingly (well, at least for me) the native way using list comprehension is the fastest and about 10x faster than the numpy-way. Running the two numpy-ways without the final list saves about one µs which is still in the 10x difference.
Note that, when I surrounded each code snippet with a call to len, to ensure that Generators run till the end, the timing stayed the same.
Python includes a function called itemgetter to return the item at a specific index in a list:
from operator import itemgetter
Pass the itemgetter() function the index of the item you want to retrieve. To retrieve the first item, you would use itemgetter(0). The important thing to understand is that itemgetter(0) itself returns a function. If you pass a list to that function, you get the specific item:
itemgetter(0)([10, 20, 30]) # Returns 10
This is useful when you combine it with map(), which takes a function as its first argument, and a list (or any other iterable) as the second argument. It returns the result of calling the function on each object in the iterable:
Right now you are appending the generator object to your second list.
>>> lst2.append(item[0] for item in lst)
>>> lst2
[1, 2, 3, <generator object <genexpr> at 0xb74b3554>]
But you probably want it to be a list of first items
>>> lst2.append([item[0] for item in lst])
>>> lst2
[1, 2, 3, ['a', 1, 'x']]
Now we appended the list of first items to the existing list. If you’d like to add the items themeselves, not a list of them, to the existing ones, you’d use list.extend. In that case we don’t have to worry about adding a generator, because extend will use that generator to add each item it gets from there, to extend the current list.
>>> lst2.extend(item[0] for item in lst)
>>> lst2
[1, 2, 3, 'a', 1, 'x']
or
>>> lst2 + [x[0] for x in lst]
[1, 2, 3, 'a', 1, 'x']
>>> lst2
[1, 2, 3]
l =[['40','20','10','30'],['20','20','20','20','20','30','20'],['30','20','30','50','10','30','20','20','20'],['100','100'],['100','100','100','100','100'],['100','100','100','100']]
现在,我要做的是将列表中的每个元素转换为float。我的解决方案是这样的:
newList =[]for x in l:for y in x:
newList.append(float(y))
Here is how you would do this with a nested list comprehension:
[[float(y) for y in x] for x in l]
This would give you a list of lists, similar to what you started with except with floats instead of strings. If you want one flat list then you would use [float(y) for x in l for y in x].
回答 1
以下是将嵌套的for循环转换为嵌套列表理解的方法:
以下是嵌套列表推导的工作方式:
l a b c d e f
↓↓↓↓↓↓↓In[1]: l =[[[[[[1]]]]]]In[2]:for a in l:...:for b in a:...:for c in b:...:for d in c:...:for e in d:...:for f in e:...:print(float(f))...:1.0In[3]:[float(f)for a in l
...:for b in a
...:for c in b
...:for d in c
...:for e in d
...:for f in e]Out[3]:[1.0]
Here is how to convert nested for loop to nested list comprehension:
Here is how nested list comprehension works:
l a b c d e f
↓ ↓ ↓ ↓ ↓ ↓ ↓
In [1]: l = [ [ [ [ [ [ 1 ] ] ] ] ] ]
In [2]: for a in l:
...: for b in a:
...: for c in b:
...: for d in c:
...: for e in d:
...: for f in e:
...: print(float(f))
...:
1.0
In [3]: [float(f)
for a in l
...: for b in a
...: for c in b
...: for d in c
...: for e in d
...: for f in e]
Out[3]: [1.0]
For your case, it will be something like this.
In [4]: new_list = [float(y) for x in l for y in x]
回答 2
>>> l =[['40','20','10','30'],['20','20','20','20','20','30','20'],['30','20','30','50','10','30','20','20','20'],['100','100'],['100','100','100','100','100'],['100','100','100','100']]>>> new_list =[float(x)for xs in l for x in xs]>>> new_list
[40.0,20.0,10.0,30.0,20.0,20.0,20.0,20.0,20.0,30.0,20.0,30.0,20.0,30.0,50.0,10.0,30.0,20.0,20.0,20.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0]
Not sure what your desired output is, but if you’re using list comprehension, the order follows the order of nested loops, which you have backwards. So I got the what I think you want with:
[float(y) for x in l for y in x]
The principle is: use the same order you’d use in writing it out as nested for loops.
回答 4
由于我来这里不晚,但我想分享列表理解的实际工作原理,尤其是嵌套列表理解:
New_list=[[float(y)for x in l]
实际上与:
New_list=[]for x in l:New_list.append(x)
现在嵌套列表理解:
[[float(y)for y in x]for x in l]
与;
new_list=[]for x in l:
sub_list=[]for y in x:
sub_list.append(float(y))
new_list.append(sub_list)print(new_list)
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*10]">>>100000 loops, best of 3:15.2 usec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*10]">>>10000 loops, best of 3:19.6 usec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*100]">>>100000 loops, best of 3:15.2 usec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*100]">>>10000 loops, best of 3:19.6 usec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*1000]">>>1000 loops, best of 3:1.43 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*1000]">>>100 loops, best of 3:1.91 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*10000]">>>100 loops, best of 3:13.6 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*10000]">>>10 loops, best of 3:19.1 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*100000]">>>10 loops, best of 3:164 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*100000]">>>10 loops, best of 3:216 msec per loop
在下一组测试中,我希望将每个列表的元素数量增加到100个。
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*10]">>>10000 loops, best of 3:110 usec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*10]">>>10000 loops, best of 3:151 usec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*100]">>>1000 loops, best of 3:1.11 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*100]">>>1000 loops, best of 3:1.5 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*1000]">>>100 loops, best of 3:11.2 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*1000]">>>100 loops, best of 3:16.7 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*10000]">>>10 loops, best of 3:134 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*10000]">>>10 loops, best of 3:171 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*100000]">>>10 loops, best of 3:1.32 sec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*100000]">>>10 loops, best of 3:1.7 sec per loop
让我们采取一个勇敢的步骤并将列表中的元素数修改为1000
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*10]">>>1000 loops, best of 3:800 usec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*10]">>>1000 loops, best of 3:1.16 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*100]">>>100 loops, best of 3:8.26 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*100]">>>100 loops, best of 3:11.7 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*1000]">>>10 loops, best of 3:83.8 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*1000]">>>10 loops, best of 3:118 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*10000]">>>10 loops, best of 3:868 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*10000]">>>10 loops, best of 3:1.23 sec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*100000]">>>10 loops, best of 3:9.2 sec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*100000]">>>10 loops, best of 3:12.7 sec per loop
I had a similar problem to solve so I came across this question. I did a performance comparison of Andrew Clark’s and narayan’s answer which I would like to share.
Lets do a performance benchmark to see if it is actually true. I used python version 3.5.0 to perform all these tests. In first set of tests I would like to keep elements per list to be 10 and vary number of lists from 10-100,000
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*10]"
>>> 100000 loops, best of 3: 15.2 usec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*10]"
>>> 10000 loops, best of 3: 19.6 usec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*100]"
>>> 100000 loops, best of 3: 15.2 usec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*100]"
>>> 10000 loops, best of 3: 19.6 usec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*1000]"
>>> 1000 loops, best of 3: 1.43 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*1000]"
>>> 100 loops, best of 3: 1.91 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*10000]"
>>> 100 loops, best of 3: 13.6 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*10000]"
>>> 10 loops, best of 3: 19.1 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,10))]*100000]"
>>> 10 loops, best of 3: 164 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,10))]*100000]"
>>> 10 loops, best of 3: 216 msec per loop
In the next set of tests I would like to raise number of elements per lists to 100.
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*10]"
>>> 10000 loops, best of 3: 110 usec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*10]"
>>> 10000 loops, best of 3: 151 usec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*100]"
>>> 1000 loops, best of 3: 1.11 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*100]"
>>> 1000 loops, best of 3: 1.5 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*1000]"
>>> 100 loops, best of 3: 11.2 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*1000]"
>>> 100 loops, best of 3: 16.7 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*10000]"
>>> 10 loops, best of 3: 134 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*10000]"
>>> 10 loops, best of 3: 171 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,100))]*100000]"
>>> 10 loops, best of 3: 1.32 sec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,100))]*100000]"
>>> 10 loops, best of 3: 1.7 sec per loop
Lets take a brave step and modify the number of elements in lists to be 1000
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*10]"
>>> 1000 loops, best of 3: 800 usec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*10]"
>>> 1000 loops, best of 3: 1.16 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*100]"
>>> 100 loops, best of 3: 8.26 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*100]"
>>> 100 loops, best of 3: 11.7 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*1000]"
>>> 10 loops, best of 3: 83.8 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*1000]"
>>> 10 loops, best of 3: 118 msec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*10000]"
>>> 10 loops, best of 3: 868 msec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*10000]"
>>> 10 loops, best of 3: 1.23 sec per loop
>>> python -m timeit "[list(map(float,k)) for k in [list(range(0,1000))]*100000]"
>>> 10 loops, best of 3: 9.2 sec per loop
>>> python -m timeit "[[float(y) for y in x] for x in [list(range(0,1000))]*100000]"
>>> 10 loops, best of 3: 12.7 sec per loop
From these test we can conclude that map has a performance benefit over list comprehension in this case. This is also applicable if you are trying to cast to either int or str. For small number of lists with less elements per list, the difference is negligible. For larger lists with more elements per list one might like to use map instead of list comprehension, but it totally depends on application needs.
However I personally find list comprehension to be more readable and idiomatic than map. It is a de-facto standard in python. Usually people are more proficient and comfortable(specially beginner) in using list comprehension than map.
l =[['40','20','10','30'],['20','20','20','20','20','30','20'],['30','20','30','50','10','30','20','20','20'],['100','100'],['100','100','100','100','100'],['100','100','100','100']]
map(lambda x:map(lambda y:float(y),x),l)
This Problem can be solved without using for loop.Single line code will be sufficient for this. Using Nested Map with lambda function will also works here.
You can do it using this class I just made. With this class you can use the Map object like another dictionary(including json serialization) or with the dot notation. I hope to help you:
class Map(dict):
"""
Example:
m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
"""
def __init__(self, *args, **kwargs):
super(Map, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.iteritems():
self[k] = v
if kwargs:
for k, v in kwargs.iteritems():
self[k] = v
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(Map, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(Map, self).__delitem__(key)
del self.__dict__[key]
Usage examples:
m = Map({'first_name': 'Eduardo'}, last_name='Pool', age=24, sports=['Soccer'])
# Add new key
m.new_key = 'Hello world!'
# Or
m['new_key'] = 'Hello world!'
print m.new_key
print m['new_key']
# Update values
m.new_key = 'Yay!'
# Or
m['new_key'] = 'Yay!'
# Delete key
del m.new_key
# Or
del m['new_key']
回答 1
我一直将其保存在util文件中。您也可以在自己的类中将其用作混合。
class dotdict(dict):"""dot.notation access to dictionary attributes"""
__getattr__ = dict.get
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
mydict ={'val':'it works'}
nested_dict ={'val':'nested works too'}
mydict = dotdict(mydict)
mydict.val
# 'it works'
mydict.nested = dotdict(nested_dict)
mydict.nested.val
# 'nested works too'
Fabric has a really nice, minimal implementation. Extending that to allow for nested access, we can use a defaultdict, and the result looks something like this:
from collections import defaultdict
class AttributeDict(defaultdict):
def __init__(self):
super(AttributeDict, self).__init__(AttributeDict)
def __getattr__(self, key):
try:
return self[key]
except KeyError:
raise AttributeError(key)
def __setattr__(self, key, value):
self[key] = value
Don’t. Attribute access and indexing are separate things in Python, and you shouldn’t want them to perform the same. Make a class (possibly one made by namedtuple) if you have something that should have accessible attributes and use [] notation to get an item from a dict.
classDictWrap(object):""" Wrap an existing dict, or create a new one, and access with either dot
notation or key lookup.
The attribute _data is reserved and stores the underlying dictionary.
When using the += operator with create=True, the empty nested dict is
replaced with the operand, effectively creating a default dictionary
of mixed types.
args:
d({}): Existing dict to wrap, an empty dict is created by default
create(True): Create an empty, nested dict instead of raising a KeyError
example:
>>>dw = DictWrap({'pp':3})
>>>dw.a.b += 2
>>>dw.a.b += 2
>>>dw.a['c'] += 'Hello'
>>>dw.a['c'] += ' World'
>>>dw.a.d
>>>print dw._data
{'a': {'c': 'Hello World', 'b': 4, 'd': {}}, 'pp': 3}
"""def __init__(self, d=None, create=True):if d isNone:
d ={}
supr = super(DictWrap, self)
supr.__setattr__('_data', d)
supr.__setattr__('__create', create)def __getattr__(self, name):try:
value = self._data[name]exceptKeyError:ifnot super(DictWrap, self).__getattribute__('__create'):raise
value ={}
self._data[name]= value
if hasattr(value,'items'):
create = super(DictWrap, self).__getattribute__('__create')returnDictWrap(value, create)return value
def __setattr__(self, name, value):
self._data[name]= value
def __getitem__(self, key):try:
value = self._data[key]exceptKeyError:ifnot super(DictWrap, self).__getattribute__('__create'):raise
value ={}
self._data[key]= value
if hasattr(value,'items'):
create = super(DictWrap, self).__getattribute__('__create')returnDictWrap(value, create)return value
def __setitem__(self, key, value):
self._data[key]= value
def __iadd__(self, other):if self._data:raiseTypeError("A Nested dict will only be replaced if it's empty")else:return other
Building on Kugel’s answer and taking Mike Graham’s words of caution into consideration, what if we make a wrapper?
class DictWrap(object):
""" Wrap an existing dict, or create a new one, and access with either dot
notation or key lookup.
The attribute _data is reserved and stores the underlying dictionary.
When using the += operator with create=True, the empty nested dict is
replaced with the operand, effectively creating a default dictionary
of mixed types.
args:
d({}): Existing dict to wrap, an empty dict is created by default
create(True): Create an empty, nested dict instead of raising a KeyError
example:
>>>dw = DictWrap({'pp':3})
>>>dw.a.b += 2
>>>dw.a.b += 2
>>>dw.a['c'] += 'Hello'
>>>dw.a['c'] += ' World'
>>>dw.a.d
>>>print dw._data
{'a': {'c': 'Hello World', 'b': 4, 'd': {}}, 'pp': 3}
"""
def __init__(self, d=None, create=True):
if d is None:
d = {}
supr = super(DictWrap, self)
supr.__setattr__('_data', d)
supr.__setattr__('__create', create)
def __getattr__(self, name):
try:
value = self._data[name]
except KeyError:
if not super(DictWrap, self).__getattribute__('__create'):
raise
value = {}
self._data[name] = value
if hasattr(value, 'items'):
create = super(DictWrap, self).__getattribute__('__create')
return DictWrap(value, create)
return value
def __setattr__(self, name, value):
self._data[name] = value
def __getitem__(self, key):
try:
value = self._data[key]
except KeyError:
if not super(DictWrap, self).__getattribute__('__create'):
raise
value = {}
self._data[key] = value
if hasattr(value, 'items'):
create = super(DictWrap, self).__getattribute__('__create')
return DictWrap(value, create)
return value
def __setitem__(self, key, value):
self._data[key] = value
def __iadd__(self, other):
if self._data:
raise TypeError("A Nested dict will only be replaced if it's empty")
else:
return other
def get_var(input_dict, accessor_string):"""Gets data from a dictionary using a dotted accessor-string"""
current_data = input_dict
for chunk in accessor_string.split('.'):
current_data = current_data.get(chunk,{})return current_data
The language itself doesn’t support this, but sometimes this is still a useful requirement. Besides the Bunch recipe, you can also write a little method which can access a dictionary using a dotted string:
def get_var(input_dict, accessor_string):
"""Gets data from a dictionary using a dotted accessor-string"""
current_data = input_dict
for chunk in accessor_string.split('.'):
current_data = current_data.get(chunk, {})
return current_data
class Map(dict):
def __init__(self, *args, **kwargs):
super(Map, self).__init__(*args, **kwargs)
for arg in args:
if isinstance(arg, dict):
for k, v in arg.iteritems():
if isinstance(v, dict):
v = Map(v)
if isinstance(v, list):
self.__convert(v)
self[k] = v
if kwargs:
for k, v in kwargs.iteritems():
if isinstance(v, dict):
v = Map(v)
elif isinstance(v, list):
self.__convert(v)
self[k] = v
def __convert(self, v):
for elem in xrange(0, len(v)):
if isinstance(v[elem], dict):
v[elem] = Map(v[elem])
elif isinstance(v[elem], list):
self.__convert(v[elem])
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(Map, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(Map, self).__delitem__(key)
del self.__dict__[key]
classDictProxy(object):def __init__(self, obj):
self.obj = obj
def __getitem__(self, key):return wrap(self.obj[key])def __getattr__(self, key):try:return wrap(getattr(self.obj, key))exceptAttributeError:try:return self[key]exceptKeyError:raiseAttributeError(key)# you probably also want to proxy important list properties along like# items(), iteritems() and __len__classListProxy(object):def __init__(self, obj):
self.obj = obj
def __getitem__(self, key):return wrap(self.obj[key])# you probably also want to proxy important list properties along like# __iter__ and __len__def wrap(value):if isinstance(value, dict):returnDictProxy(value)if isinstance(value,(tuple, list)):returnListProxy(value)return value
I ended up trying BOTH the AttrDict and the Bunch libraries and found them to be way to slow for my uses. After a friend and I looked into it, we found that the main method for writing these libraries results in the library aggressively recursing through a nested object and making copies of the dictionary object throughout. With this in mind, we made two key changes. 1) We made attributes lazy-loaded 2) instead of creating copies of a dictionary object, we create copies of a light-weight proxy object. This is the final implementation. The performance increase of using this code is incredible. When using AttrDict or Bunch, these two libraries alone consumed 1/2 and 1/3 respectively of my request time(what!?). This code reduced that time to almost nothing(somewhere in the range of 0.5ms). This of course depends on your needs, but if you are using this functionality quite a bit in your code, definitely go with something simple like this.
class DictProxy(object):
def __init__(self, obj):
self.obj = obj
def __getitem__(self, key):
return wrap(self.obj[key])
def __getattr__(self, key):
try:
return wrap(getattr(self.obj, key))
except AttributeError:
try:
return self[key]
except KeyError:
raise AttributeError(key)
# you probably also want to proxy important list properties along like
# items(), iteritems() and __len__
class ListProxy(object):
def __init__(self, obj):
self.obj = obj
def __getitem__(self, key):
return wrap(self.obj[key])
# you probably also want to proxy important list properties along like
# __iter__ and __len__
def wrap(value):
if isinstance(value, dict):
return DictProxy(value)
if isinstance(value, (tuple, list)):
return ListProxy(value)
return value
The other thing to note, is that this implementation is pretty simple and doesn’t implement all of the methods you might need. You’ll need to write those as required on the DictProxy or ListProxy objects.
It allows you to parse JSON into something you can access with.attribute.lookups.like.this.r(), mostly because I hadn’t seen this answer before starting to work on it.
Not a direct answer to the OP’s question, but inspired by and perhaps useful for some.. I’ve created an object-based solution using the internal __dict__ (In no way optimized code)
payload = {
"name": "John",
"location": {
"lat": 53.12312312,
"long": 43.21345112
},
"numbers": [
{
"role": "home",
"number": "070-12345678"
},
{
"role": "office",
"number": "070-12345679"
}
]
}
class Map(object):
"""
Dot style access to object members, access raw values
with an underscore e.g.
class Foo(Map):
def foo(self):
return self.get('foo') + 'bar'
obj = Foo(**{'foo': 'foo'})
obj.foo => 'foobar'
obj._foo => 'foo'
"""
def __init__(self, *args, **kwargs):
for arg in args:
if isinstance(arg, dict):
for k, v in arg.iteritems():
self.__dict__[k] = v
self.__dict__['_' + k] = v
if kwargs:
for k, v in kwargs.iteritems():
self.__dict__[k] = v
self.__dict__['_' + k] = v
def __getattribute__(self, attr):
if hasattr(self, 'get_' + attr):
return object.__getattribute__(self, 'get_' + attr)()
else:
return object.__getattribute__(self, attr)
def get(self, key):
try:
return self.__dict__.get('get_' + key)()
except (AttributeError, TypeError):
return self.__dict__.get(key)
def __repr__(self):
return u"<{name} object>".format(
name=self.__class__.__name__
)
class Number(Map):
def get_role(self):
return self.get('role')
def get_number(self):
return self.get('number')
class Location(Map):
def get_latitude(self):
return self.get('lat') + 1
def get_longitude(self):
return self.get('long') + 1
class Item(Map):
def get_name(self):
return self.get('name') + " Doe"
def get_location(self):
return Location(**self.get('location'))
def get_numbers(self):
return [Number(**n) for n in self.get('numbers')]
# Tests
obj = Item({'foo': 'bar'}, **payload)
assert type(obj) == Item
assert obj._name == "John"
assert obj.name == "John Doe"
assert type(obj.location) == Location
assert obj.location._lat == 53.12312312
assert obj.location._long == 43.21345112
assert obj.location.latitude == 54.12312312
assert obj.location.longitude == 44.21345112
for n in obj.numbers:
assert type(n) == Number
if n.role == 'home':
assert n.number == "070-12345678"
if n.role == 'office':
assert n.number == "070-12345679"
回答 19
获得点访问(而不是数组访问)的一种简单方法是在Python中使用普通对象。像这样:
classYourObject:def __init__(self,*args,**kwargs):for k, v in kwargs.items():
setattr(self, k, v)
This solution is a refinement upon the one offered by epool to address the requirement of the OP to access nested dicts in a consistent manner. The solution by epool did not allow for accessing nested dicts.
class YAMLobj(dict):
def __init__(self, args):
super(YAMLobj, self).__init__(args)
if isinstance(args, dict):
for k, v in args.iteritems():
if not isinstance(v, dict):
self[k] = v
else:
self.__setattr__(k, YAMLobj(v))
def __getattr__(self, attr):
return self.get(attr)
def __setattr__(self, key, value):
self.__setitem__(key, value)
def __setitem__(self, key, value):
super(YAMLobj, self).__setitem__(key, value)
self.__dict__.update({key: value})
def __delattr__(self, item):
self.__delitem__(item)
def __delitem__(self, key):
super(YAMLobj, self).__delitem__(key)
del self.__dict__[key]
With this class, one can now do something like: A.B.C.D.
classDotDict(dict):
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def __getattr__(self, key):def typer(candidate):if isinstance(candidate, dict):returnDotDict(candidate)if isinstance(candidate, str):# iterable but no need to iterreturn candidate
try:# other iterable are processed as listreturn[typer(item)for item in candidate]exceptTypeError:return candidate
return candidate
return typer(dict.get(self, key))
class DotDict(dict):
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def __getattr__(self, key):
def typer(candidate):
if isinstance(candidate, dict):
return DotDict(candidate)
if isinstance(candidate, str): # iterable but no need to iter
return candidate
try: # other iterable are processed as list
return [typer(item) for item in candidate]
except TypeError:
return candidate
return candidate
return typer(dict.get(self, key))