• 从使用场景了解puppeteer
  • Artiely
  • #puppeteer
  • 2021-01-03
  • 1076
  • 6 min read
  • loading...

Puppeteer 是什么

英 [ˌpʌpɪˈtɪər]
美 [ˌpʌpɪˈtɪr]

n. 操纵木偶的人;操纵傀儡
vt. 操纵

  • 浏览器自动化库
  • Puppeteer = Node.js + Chrome
  • 评估 Javascript

官网

Puppeteer

安装

只需安装 NPM 包即可开始

npm i puppeteer
复制成功
1

然后

const puppetter = require("puppeteer");
(async () => {
  // ....
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
})();
复制成功
1
2
3
4
5
6

从一些场景中了解 Puppeteer 的应用

  1. 某购物网站某商品的价格变化趋势或某新闻网站的新闻






 
 




const puppetter = require("puppeteer");
(async () => {
  // ....
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  // 获取价格
  await page.goto("https://...");
  const price = await page.$sevel(".price", (div) => div.textContent);
  console.log(price);
  await browser.close();
})();
复制成功
1
2
3
4
5
6
7
8
9
10
11
  1. 处理异步和对网站进行测试 可以结合任何测试库




 
 





it('should pay',()=>{
  const page = await browser.newPage();
  await page.goto("https://...");
  // 异步处理 waithForX ...
  await page.waitForSelect('.button')
  const response = page.waitForResponse(res=>res.url().endsWith('/pay'))
  await page.click('.button')
  await response
  assert(await page.$evel('.success'))
})
复制成功
1
2
3
4
5
6
7
8
9
10
  1. 测试在移动设备的表现


 



it('should work on mobile',()=>{
  const page = await browser.newPage();
  await page.emulate(puppeteer.devices['iPhone 8'])
  // ...
})
复制成功
1
2
3
4
5
  1. 网站的离线表现




 




it('should notify user when goes offline',()=>{
  const page = await browser.newPage();
  await page.emulate(puppeteer.devices['iPhone 8'])
  await page.goto("https://...");
  await page.setOfflineMode(true)
  const alert = await page.waitForSelece('.offline-alert')
  expect(alert).toBeTruthy()
})
复制成功
1
2
3
4
5
6
7
8
  1. 检测注册的 servce worker




 
 
 
 
 
 
 



it('should register service worker',()=>{
  const page = await browser.newPage();
  await page.emulate(puppeteer.devices['iPhone 8'])
  await page.goto("https://...");
  const swTarget = await page.waitForTarget(target=>{
    return target.type()==='ervice_worker'
  })
  const sw = await swTarget.worker()
  const isCatch  = await sw.evalute(async()=>{
    return !!(await caches.match('https://artiely.gitee.io/x.png'))
  })
  expect(isCatch).toBe(true)
})
复制成功
1
2
3
4
5
6
7
8
9
10
11
12
13

进阶

await page.goto('https://...')
await page.evaluate('navigator.serviceWorker.ready');

const requests = new Map();
page.on('request',req=>{
  return requests.set(req.url(),req);
})

await page.reload({waitUntil:'networkkid0'});

for(const [url,req] of requests){
  const swResp = req.response().fromServiceWorker();
  const.log(url,swResp?'√''×')
}
复制成功
1
2
3
4
5
6
7
8
9
10
11
12
13
14
  1. 模拟位置



 
 
 
 




it('should $ in US',()=>{
  const page = await browser.newPage();
  await page.emulate(puppeteer.devices['iPhone 8'])
  await page.setGeolocation({
    latitude:140.00,
    longitude:30.00
  })
  await page.goto("https://...");
  // ...
})
复制成功
1
2
3
4
5
6
7
8
9
10
  1. 拦截并修改。。。
await page serRequestInterception(true)
page.on('request',request => {
  // 修改图片
  if(request.resourceType()=== 'image')
    request.respond({body: randomCatImage()})
  else
    request.continue()
  // 取消请求
  if(request.url() === 'https://x/pay')
    request.abort()
  else
    request.contiune()
})
复制成功
1
2
3
4
5
6
7
8
9
10
11
12
13
  1. 模拟输入和事件
await page.goto("https://pptr.dev/");
const input = await page.waitForSelect("[type=search]");
await input.type("javascript");
await page.keyboard.press("Enter");
复制成功
1
2
3
4
  1. 页面性能指标
const metrics = await page.metrics();
console.log(metrics);
复制成功
1
2
await page.tracing.start({path:'./trace.json'});
await page.goto('https://pptr.dev');
await page.waitForSelect('select');
// ...
await page..tracing.stop()
复制成功
1
2
3
4
5
  1. 服务端渲染
<div id="container"></div>
<script>
  function render(posts, container) {
    const html = posts.reduce((html, post) => {
      return `${html}
        <li>
        <h2>${post.title}</h2>
        <p>${post.content}</p>
        </li>
        `;
    }, "");
    container.innerHTML = `<ul>${html}<ul>`;
  }
  fetch("/post")
    .then((res) => res.json())
    .then((res) => rednder(res, document.getElementById("container")));
</script>
复制成功
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

ssr.js

import puppeteer from "puppeteer";
export async function ssr(url) {
  const browser = await puppeteer.lanuch({ headless: true });
  const page = await browser.newPage();
  await page.goto(url, { waitUntil: "networkidle0" });
  const html = await page.content();
  await browser.close();
  return html;
}
复制成功
1
2
3
4
5
6
7
8
9

server.js





 
 
 





import express from "express";
import { ssr } from "./ssr.js";
const app = express();
app.get("/", async (req, res, next) => {
  const origin = `${req.protocol}://${req.get("host")}`;
  const html = await ssr(`${origin}/index.html`);
  return res.status(200).send(html);
});
app.listen(8080, () => {
  console.log("server start");
});
复制成功
1
2
3
4
5
6
7
8
9
10
11
  • 优化更快的直达
    ssr.js





 

 




import puppeteer from "puppeteer";
export async function ssr(url) {
  const browser = await puppeteer.lanuch({ headless: true });
  const page = await browser.newPage();
  // await page.goto(url, { waitUntil: "networkidle0" });
  await page.goto(url, { waitUntil: "documentloaded" });
  // const html = await page.content();
  await page.waitForSelector("#post");
  await browser.close();
  return html;
}
复制成功
1
2
3
4
5
6
7
8
9
10
11
  • 优化 2 添加缓存
    ssr.js
const RENDER_CACHE = new Map();
async function ssr(url) {
  if (RENDER_CACHE.has(url)) return RENDER_CACHE.get(url);
  // ...
  RENDER_CACHE.set(url, html);
  return html;
}
复制成功
1
2
3
4
5
6
7

添加缓存
index.html

<div id="container"></div>
<script>
  function render(posts, container) {
    const html = posts.reduce((html, post) => {
      return `${html}
        <li>
        <h2>${post.title}</h2>
        <p>${post.content}</p>
        </li>
        `;
    }, "");
    container.innerHTML = `<ul id="post">${html}<ul>`;
  }
  const container = document.querySelector("#container");
  const PRE_RENDER = document.querySelector("#post");
  if (!PRE_RENDER) {
    fetch("/post")
      .then((res) => res.json())
      .then((res) => rednder(res, container));
  }
</script>
复制成功
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

继续优化-取消不必要的网络请求
ssr.js

async function ssr(url) {
  // ...
  await page.setRequestInterception(true);
  page.on("request", (req) => {
    const whitlist = ["document", "script", "xhr", "fetch"];
    if (!whitelist.includes(req.resourceType())) {
      return req.abort();
    }
    req.continue();
  });
}
复制成功
1
2
3
4
5
6
7
8
9
10
11
  • 思考-扩展骨架屏
  1. 测试代码覆盖率
const puppeteer = require('puppeteer');

(async () => {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();

  // 开始收集JS和CSS文件的覆盖率信息
  await Promise.all([page.coverage.startJSCoverage(), page.coverage.startCSSCoverage()]);

  await page.goto('https://www.baidu.com');
  await page.waitForSelector('title');

  // 停止收集覆盖率信息
  const [jsCoverage, cssCoverage] = await Promise.all([
    page.coverage.stopJSCoverage(),
    page.coverage.stopCSSCoverage()
  ]);

  // 根据覆盖率计算使用了多少字节
  const calculateUsedBytes = (type, coverage) =>
    coverage.map(({ url, ranges, text }) => {
      let usedBytes = 0;

      ranges.forEach(range => (usedBytes += range.end - range.start - 1));

      return {
        url,
        type,
        usedBytes,
        totalBytes: text.length
      };
    });

  console.info([
    ...calculateUsedBytes('js', jsCoverage),
    ...calculateUsedBytes('css', cssCoverage)
  ]);

  await browser.close();
})();

复制成功
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
  1. 代码注入
  2. 自动化测试扩展程序
  3. AB测试
  4. 人机验证

https://filipvitas.medium.com/how-to-bypass-slider-captcha-with-js-and-puppeteer-cd5e28105e3c

https://juejin.cn/post/6844903566289682440

https://juejin.cn/post/6844903860788527118

https://jsoverson.medium.com/bypassing-captchas-with-headless-chrome-93f294518337

const CRX = "/path/to/extension/";

const browser = await puppeteer.launch({
  headless: false,
  args: [`--disable-extensions-except=${CRX}`, `--load-extension=${CRX}`],
});
复制成功
1
2
3
4
5
6

有哪些启动参数

  1. 爬虫
    https://github.com/artiely/koa2-vue/tree/master/server/crawler

可用于在线演示的网站

https://try-puppeteer.appspot.com/

总结

  • 网页截图或者生成 PDF
  • 爬取 SPA 或 SSR 网站
  • UI 自动化测试,模拟表单提交,键盘输入,点击等行为
  • 捕获网站的时间线,帮助诊断性能问题
  • 创建一个最新的自动化测试环境,使用最新的 js 和最新的 Chrome 浏览器运行测试用例
  • 测试 Chrome 扩展程序
    ...

参考示例

https://github.com/artiely/puppeteer-demo

https://github.com/puppeteer/examples

入门资料

https://www.qikegu.com/docs/4531

https://juejin.cn/post/6844903605485453320

其他

https://github.com/stereobooster/react-snap

https://github.com/g-plane/rize

https://github.com/richshaw2015/wxapp-appium