The test code in this article should use the data crawled in the previous article , Link from previous chapter : Reptiles : Get dynamic load data (selenium)( A station ) , The content of this article is the key words of some questions
1. Multiprocess syntax
1.1 grammar 1
import multiprocessing import time def func(x): print(x*x) if __name__ == '__main__': start = time.time() jobs = [] for i in range(5): p = multiprocessing.Process(target=func, args=(i, )) jobs.append(p) p.start() end = time.time() print(end - start)
The screenshot is as follows : Print the time first, I don't know how to explain ? Ask the boss for advice
1.2 grammar 2
from multiprocessing import Pool import time def func(x, y): print(x+y) if __name__ == '__main__': pool = Pool(5) start = time.time() for i in range(100): pool.apply_async(func=func, args=(i, 3)) pool.close() pool.join() end = time.time() print(end - start)
2. Practice testing code
import requests from bs4 import BeautifulSoup import time from requests.exceptions import RequestException from pymongo import MongoClient from multiprocessing import Pool client = MongoClient('localhost') db = client['test_db'] def get_page_keyword(url, word): headers = { 'cookie': '', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' } # Replace it with your own cookie try: html = requests.get(url, headers=headers, timeout=5) html = BeautifulSoup(html.text, "html.parser") key_words = html.find("div", {'class': 'QuestionPage'}).find("meta", {'itemprop': 'keywords'}).attrs['content'] print(key_words) with open(r' Women's topic links .txt', 'a') as file: file.write(key_words + '\n') db[u'' + word + 'keyword'].insert_one({"link": url, "key_words": key_words, "time": time.ctime()}) except RequestException: print(' request was aborted ') if __name__ == '__main__': input_word = input(' Enter the topic of the linked file ( such as : women ):') f = open(r' women 2021-5-16-3-8.txt') # Crawl to the location of the linked file lines = [] for i in f.readlines(): lines.append(i.strip()) # Because the last crawl link ended with a line terminator EOF f.close() # Multi process testing pool = Pool(2) # Digital conference, hurry up , But I have two cores , And the number is too big. The website will say that your account number is abnormal soon start = time.time() for link in lines: pool.apply_async(func=get_page_keyword, args=(link, input_word)) pool.close() pool.join() end = time.time() print(end - start)
Screenshot : I'm not going to run again , This is the previous screenshot