多线程 多进程 协程
进程 运行中的程序
线程 被CPU调度的执行过程,操作系统 运算调度的min单位
在进程之中,进程中实际运作单位
from threading import Thread
#创建任务
def func(name):
for i in range(100):
print(name,i)
if __name__ == '__main__':
#创建线程
t1 = Thread(target = func,args=("一一一",))#Thread() ,必须要求元组
t2 = Thread(target = func,args=("二二二",))
t1.start()
t2.start()
print("我是主线程")
#1个主线程 2个副线程
面向对象方法
from threading import Thread
class MyThread(Thread):
def __init__ (self,name):#init 传参
super(MyThread,self).__init__()#初始化
self.name = name
def run(self): #run方法 -->返回值可不写
for i in range(100):
print(self.name,i)
if __name__ == '__main__':
t1 = MyThread("一一一")
t2 = MyThread("二二二")
t1.start()
t2.start()
线程池
from concurrent.futures import ThreadPoolExecutor
def func(name):
for i in range(10):
print(name,i)
if __name__ == '__main__':
with ThreadPoolExecutor(10) as t:
for i in range(100):
t.submit (func,f"num{i}")
返回值使用
from concurrent.futures import ThreadPoolExecutor
def func(name):
for i in range(10):
print(name,i)
return name
def fn(res):
print(res.result())
if __name__ == '__main__':
with ThreadPoolExecutor(10) as t:
for i in range(100):
t.submit (func,f"num{i}").add_done_callback(fn) #返回 即执行 callback函数
#顺序不确定
map返回值 有顺序
from concurrent.futures import ThreadPoolExecutor
def func(name):
for i in range(10):
print(name,i)
return name
def fn(res):
print(res.result())
if __name__ == '__main__':
with ThreadPoolExecutor(10) as t:
for i in range(100):
t.submit (func,f"num{i}").add_done_callback(fn) #返回 即执行 callback函数 顺序不确定
#绑定fn函数 fn 返回值-result()?
if __name__ == '__main__':
with ThreadPoolExecutor(10) as t:
result = t.map(func,["111","222","333"])
for r in result:
print(r)
# map 返回值为生成器,返回顺序一致
线程池案例
#北京新发地
import requests
from lxml import etree
from concurrent.futures import ThreadPoolExecutor#线程池
f = open("线程池案例.csv","w",encoding="utf-8")
def download (url):
resp = requests.get(url)
tree = etree.HTML(resp.text)
tr_list = tree.xpath("//table[@class='hq_table']/tr")
for tr in tr_list:
td_texts = tr.xpath("./td/text()")
s = ",".join(td_texts)
f.write(s)
f.write('\n')
if __name__ =='__main__':
with ThreadPoolExecutor(10) as t: #线程池
for i in range(1,16964):
url = f"http...{i}.shtml"
#download(url) 线程池不能这么干
t.submit(download,url)