>>> def create_generator():
... mylist = range(3)
... for i in mylist:
... yield i*i
...
>>> mygenerator = create_generator() # create a generator
>>> print(mygenerator) # mygenerator is an object!
<generator object create_generator at 0xb7555c34>
>>> for i in mygenerator:
... print(i)
0
1
4
第一次 for 调用从您的函数创建的生成器对象时,它将从头开始运行您的函数中的代码,直到命中yield,然后它将返回循环的第一个值。然后,每个后续调用将运行您在函数中编写的循环的另一次迭代并返回下一个值。这将一直持续到生成器被认为是空的为止。
4. 控制生成器耗尽的一个例子
>>> class Bank(): # Let's create a bank, building ATMs
... crisis = False
... def create_atm(self):
... while not self.crisis:
... yield "$100"
>>> hsbc = Bank() # When everything's ok the ATM gives you as much as you want
>>> corner_street_atm = hsbc.create_atm()
>>> print(corner_street_atm.next())
$100
>>> print(corner_street_atm.next())
$100
>>> print([corner_street_atm.next() for cash in range(5)])
['$100', '$100', '$100', '$100', '$100']
>>> hsbc.crisis = True # Crisis is coming, no more money!
>>> print(corner_street_atm.next())
<type 'exceptions.StopIteration'>
>>> wall_street_atm = hsbc.create_atm() # It's even true for new ATMs
>>> print(wall_street_atm.next())
<type 'exceptions.StopIteration'>
>>> hsbc.crisis = False # The trouble is, even post-crisis the ATM remains empty
>>> print(corner_street_atm.next())
<type 'exceptions.StopIteration'>
>>> brand_new_atm = hsbc.create_atm() # Build a new one to get back in business
>>> for cash in brand_new_atm:
... print cash
$100
$100
$100
$100
$100
$100
$100
$100
$100
...
你可以在代码中检查 Python 的版本,以确保你的用户没有在不兼容的版本中运行脚本。检查方式如下:ifnot sys.version_info > (2, 7): # berate your user for running a 10 year # python version elifnot sys.version_info >= (3, 5): # Kindly tell your user (s)he needs to upgrade # because you’re using 3.5 features
完整的命令列表,请点击此处查看(https://ipython.readthedocs.io/en/stable/interactive/magics.html)。还有一个非常实用的功能:引用上一个命令的输出。In 和 Out 是实际的对象。你可以通过 Out[3] 的形式使用第三个命令的输出。IPython 的安装命令如下:pip3 install ipython
4.Python 编程技巧 – 列表推导式
你可以利用列表推导式,避免使用循环填充列表时的繁琐。列表推导式的基本语法如下:[ expression for item in list if conditional ]举一个基本的例子:用一组有序数字填充一个列表:mylist = [i for i in range(10)] print(mylist) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]由于可以使用表达式,所以你也可以做一些算术运算:squares = [x**2 for x in range(10)] print(squares) # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]甚至可以调用外部函数:defsome_function(a): return (a + 5) / 2
my_formula = [some_function(i) for i in range(10)] print(my_formula) # [2, 3, 3, 4, 4, 5, 5, 6, 6, 7]
最后,你还可以使用 ‘if’ 来过滤列表。在如下示例中,我们只保留能被2整除的数字:filtered = [i for i in range(20) if i%2==0] print(filtered) # [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
有些人非常喜欢表情符,而有些人则深恶痛绝。我在此郑重声明:在分析社交媒体数据时,表情符可以派上大用场。首先,我们来安装表情符模块:pip3 install emoji安装完成后,你可以按照如下方式使用:import emoji result = emoji.emojize(‘Python is :thumbs_up:’) print(result) # ‘Python is 👍’
# You can also reverse this: result = emoji.demojize(‘Python is 👍’) print(result) # ‘Python is :thumbs_up:’
# Convert a string representation of # a number into a list of ints. list_of_ints = list(map(int, “1234567”))) print(list_of_ints) # [1, 2, 3, 4, 5, 6, 7]
# And since a string can be treated like a # list of letters, you can also get the # unique letters from a string this way: print (set(“aaabbbcccdddeeefff”)) # {‘a’, ‘b’, ‘c’, ‘d’, ‘e’, ‘f’}
虽然你可以用三重引号将代码中的多行字符串括起来,但是这种做法并不理想。所有放在三重引号之间的内容都会成为字符串,包括代码的格式,如下所示。我更喜欢另一种方法,这种方法不仅可以将多行字符串连接在一起,而且还可以保证代码的整洁。唯一的缺点是你需要明确指定换行符。s1 = “””Multi line strings can be put between triple quotes. It’s not ideal when formatting your code though”””
print (s1) # Multi line strings can be put # between triple quotes. It’s not ideal # when formatting your code though
s2 = (“You can also concatenate multiple\n” + “strings this way, but you’ll have to\n” “explicitly put in the newlines”)
print(s2) # You can also concatenate multiple # strings this way, but you’ll have to # explicitly put in the newlines
24. Python 编程技巧 – 条件赋值中的三元运算符
这种方法可以让代码更简洁,同时又可以保证代码的可读性:[on_true] if [expression] else [on_false]示例如下:x = “Success!” if (y == 2) else“Failed!”
print(Fore.RED + ‘some red text’) print(Back.GREEN + ‘and with a green background’) print(Style.DIM + ‘and in dim text’) print(Style.RESET_ALL) print(‘back to normal now’)
python3 event.py
function_1 called
function_2 called
function_3 called
function_1 called
function_2 called
function_3 called
function_1 called
function_2 called
function_3 called
df = pd.DataFrame([ ... [1, 2, 3, 4], ... [5, 6, 7, 8], ... [9, 10, 11, 12] ... ]).set_index([0, 1]).rename_axis(['a', 'b']) >>> df.columns = pd.MultiIndex.from_tuples([ ... ('c', 'e'), ('d', 'f') ... ], names=['level_1', 'level_2']) >>> df level_1 c d level_2 e f a b 1234 5678 9101112 >>> df.droplevel('a') level_1 c d level_2 e f b 234 678 101112 >>> df.droplevel('level2', axis=1) level_1 c d a b 1234 5678 9101112
# 推荐写法,代码耗时:0.33秒 class DemoClass: def __init__(self, value: int): self.value = value # 避免不必要的属性访问器
def main(): size = 1000000 for i in range(size): demo_instance = DemoClass(size) value = demo_instance.value demo_instance.value = i
main()
4. 避免数据复制
4.1 避免无意义的数据复制
# 不推荐写法,代码耗时:6.5秒 def main(): size = 10000 for _ in range(size): value = range(size) value_list = [x for x in value] square_list = [x * x for x in value_list]
main()
上面的代码中value_list完全没有必要,这会创建不必要的数据结构或复制。
# 推荐写法,代码耗时:4.8秒 def main(): size = 10000 for _ in range(size): value = range(size) square_list = [x * x for x in value] # 避免无意义的复制
def main(): string_list = list(string.ascii_letters * 100) for _ in range(10000): result = concatString(string_list)
main()
5. 利用if条件的短路特性
# 不推荐写法,代码耗时:0.05秒 from typing import List
def concatString(string_list: List[str]) -> str: abbreviations = {'cf.', 'e.g.', 'ex.', 'etc.', 'flg.', 'i.e.', 'Mr.', 'vs.'} abbr_count = 0 result = '' for str_i in string_list: if str_i in abbreviations: result += str_i return result
def main(): for _ in range(10000): string_list = ['Mr.', 'Hat', 'is', 'Chasing', 'the', 'black', 'cat', '.'] result = concatString(string_list)
main()
if 条件的短路特性是指对if a and b这样的语句, 当a为False时将直接返回,不再计算b;对于if a or b这样的语句,当a为True时将直接返回,不再计算b。因此, 为了节约运行时间,对于or语句,应该将值为True可能性比较高的变量写在or前,而and应该推后。
# 推荐写法,代码耗时:0.03秒 from typing import List
def concatString(string_list: List[str]) -> str: abbreviations = {'cf.', 'e.g.', 'ex.', 'etc.', 'flg.', 'i.e.', 'Mr.', 'vs.'} abbr_count = 0 result = '' for str_i in string_list: if str_i[-1] == '.' and str_i in abbreviations: # 利用 if 条件的短路特性 result += str_i return result
def main(): for _ in range(10000): string_list = ['Mr.', 'Hat', 'is', 'Chasing', 'the', 'black', 'cat', '.'] result = concatString(string_list)
main()
6. 循环优化
6.1 用for循环代替while循环
# 不推荐写法。代码耗时:6.7秒 def computeSum(size: int) -> int: sum_ = 0 i = 0 while i < size: sum_ += i i += 1 return sum_
def main(): size = 10000 for _ in range(size): sum_ = computeSum(size)
main()
Python 的for循环比while循环快不少。
# 推荐写法。代码耗时:4.3秒 def computeSum(size: int) -> int: sum_ = 0 for i in range(size): # for 循环代替 while 循环 sum_ += i return sum_
def main(): size = 10000 for _ in range(size): sum_ = computeSum(size)
main()
6.2 使用隐式for循环代替显式for循环
针对上面的例子,更进一步可以用隐式for循环来替代显式for循环
# 推荐写法。代码耗时:1.7秒 def computeSum(size: int) -> int: return sum(range(size)) # 隐式 for 循环代替显式 for 循环
def main(): size = 10000 for _ in range(size): sum = computeSum(size)
main()
6.3 减少内层for循环的计算
# 不推荐写法。代码耗时:12.8秒 import math
def main(): size = 10000 sqrt = math.sqrt for x in range(size): for y in range(size): z = sqrt(x) + sqrt(y)
main()
上面的代码中sqrt(x)位于内侧for循环, 每次训练过程中都会重新计算一次,增加了时间开销。
# 推荐写法。代码耗时:7.0秒 import math
def main(): size = 10000 sqrt = math.sqrt for x in range(size): sqrt_x = sqrt(x) # 减少内层 for 循环的计算 for y in range(size): z = sqrt_x + sqrt(y)
def zip(*iterables): # zip('ABCD', 'xy') --> Ax By sentinel = object() iterators = [iter(it) for it in iterables] while iterators: result = [] for it in iterators: elem = next(it, sentinel) if elem is sentinel: return result.append(elem) yield tuple(result)