puppeteer 官方文档里的一段代码想改一下 https://pptr.dev/#?product=Puppeteer&version=v7.0.1&show=api-pagewaitforselectorselector-options
const puppeteer = require('puppeteer'); (async () => { const browser = await puppeteer.launch(); const page = await browser.newPage(); let currentURL; page .waitForSelector('img') .then(() => console.log('First URL with image: ' + currentURL)); for (currentURL of ['https://example.com', 'https://google.com', 'https://bbc.com']) { await page.goto(currentURL); } await browser.close(); })();
简单的 print 第一个有 img 的网站。想改成
const puppeteer = require('puppeteer'); const get_available_item = async (page) => { let currentURL; const urls = [ 'https://example.com', 'https://example.com', 'https://baidu.com' ] page.waitForSelector('img').then( () => { console.log('First URL with image: ' + currentURL) } ); for (currentURL of urls) { await page.goto(currentURL, {waitUntil: 'load'}); } }; const main = async () => { const browser = await puppeteer.launch({ headless: false, product: 'firefox', defaultViewport: { width: 1366, height: 768 } }); const page = await browser.newPage(); await get_available_item(page) await browser.close(); } main()
想让get_available_item
返回currentURL
,试了一种用 callback 返回的方法
const puppeteer = require('puppeteer'); const get_available_item = async (page, callback) => { let currentURL; const urls = [ 'https://example.com', 'https://example.com', 'https://baidu.com' ] page.waitForSelector('img').then( () => { callback(currentURL) } ); for (currentURL of urls) { await page.goto(currentURL, {waitUntil: 'load'}); } }; const main = async () => { let url const browser = await puppeteer.launch({ headless: false, product: 'firefox', defaultViewport: { width: 1366, height: 768 } }); const page = await browser.newPage(); await get_available_item(page, (res) => { url = res }) console.log(`result is ${url}`) await browser.close(); } main()
有比 callback 更简洁的方法么?
![]() | 1 cyrbuzz 2021-02-09 11:29:39 +08:00 试试用订阅发布模型。 window.addEventListener('getWithImageUrl', (_url) => url = _url); window.dispatchEvent(new Event('getWithImage')) |
2 azcvcza 2021-02-09 11:32:04 +08:00 能在你想返回的地方加点注释吗。。 |
![]() | 3 musi 2021-02-09 11:45:49 +08:00 既然已经用 async 了,把 callback 改成 promise 后用 await |
4 lzdyes 2021-02-09 12:10:51 +08:00 你都用 async await 了还不会包装个 Promise 吗,返回值 <string>Promise |
5 lzdyes 2021-02-09 12:27:33 +08:00 const get_available_item = async (page: string):Promise<string> => { return new Promise<string>( ( reject) =>{ // ... reject(currentURL) }) } const currentURL = await get_available_item(page) |
6 oxromantic 2021-02-09 12:47:20 +08:00 @lzdyes 为什么你第一个参数起名叫 reject ???? |
7 azcvcza 2021-02-09 13:48:12 +08:00 @oxromantic 实际上都是按参数位置调的,叫 reject 一样按 resolve 用,就是迷惑了点 |
8 lzdyes 2021-02-09 14:12:27 +08:00 啊哈,脑抽写错了 不过不影响 lz 自己改成 resolve 吧 |
![]() | 9 lbfeng OP @cyrbuzz 这个真没想到。 @lzdyes 你这个可能不 work 。new Promise 里的 function 一般情况下是 sync function,但因为有 await 必须用 async 。 ```Javascript const operation1 = Promise.resolve(5) const operation2 = Promise.resolve(15) const publishResult = () => Promise.reject(`Can't publish`) let p = new Promise((resolve, reject) => { (async () => { try { const op1 = await operation1; const op2 = await operation2; if (op2 == null) { throw new Error('Validation error'); } const res = op1 + op2; const result = await publishResult(res); resolve(result) } catch (err) { reject(err) } })() }); (async () => { await p; })().catch(e => console.log("Caught: " + e)); ``` 这个例子 work,但有 anti pattern |