async function ssr(url) { const browser = await puppeteer.launch({headless: true}); const page = await browser.newPage(); await page.goto(url, {waitUntil: 'networkidle0'}); const html = await page.content(); // serialized HTML of page DOM. await browser.close(); return html; }
注意: 我会在文章中使用 ES 模块(import),这要求 Node 8.5.0+,并在运行时加上 --experimental-modules 标志。觉得麻烦的话可以自行使用 require() 语句。关于 Node 上的 ES 模块支持可以读读这篇文章。
## 导论
----------------------------
如果我对 SEO 理解没有偏差的话,你读到这篇文章可能因为下面两个原因之一。首先,你已经搭建了一个 web 应用,并且它没有被搜索引擎索引!你的应用可能是 SPA,PWA,使用了 vanilla JS,或者使用了其他更复杂的框架或类库。老实说,你使用何种技术并不重要。重要的是,你花费了大量时间搭建出优秀的 web 页面,然而用户却搜不到它。你读这篇文章的另一个理由可能是因为,网上一些文章说了服务端渲染可以提升性能。你希望快速减少 JavaScript 启动时间,提升首次有效绘制速度。
// In-memory cache of rendered pages. Note: this will be cleared whenever the // server process stops. If you need true persistence, use something like // Google Cloud Storage (https://firebase.google.com/docs/storage/web/start). const RENDER_CACHE = new Map();
async function ssr(url) { if (RENDER_CACHE.has(url)) { return {html: RENDER_CACHE.get(url), ttRenderMs: 0}; }
const start = Date.now();
const browser = await puppeteer.launch(); const page = await browser.newPage(); try { // networkidle0 waits for the network to be idle (no requests for 500ms). // The page's JS has likely produced markup by this point, but wait longer // if your site lazy loads, etc. await page.goto(url, {waitUntil: 'networkidle0'}); await page.waitForSelector('#posts'); // ensure #posts exists in the DOM. } catch (err) { console.error(err); throw new Error('page.goto/waitForSelector timed out.'); }
const html = await page.content(); // serialized HTML of page DOM. await browser.close();
一起来修复这个问题。我们要告知页面,它的 HTML 早就名花有主了。我找到的解决方案是,在页面加载时判断 <ul id="posts"> 是否已在 DOM 中,如果在,页面就已经在服务端渲染过了,这样就可以避免重新创建 DOM。
public/index.html
<html> <body> <div id="container"> <!-- Populated by JS (below) or by prerendering (server). Either way, #container gets populated with the posts markup: <ul id="posts">...</ul> --> </div> </body> <script> ... (async() => { const container = document.querySelector('#container');
// Posts markup is already in DOM if we're seeing a SSR'd. // Don't re-hydrate the posts here on the client. const PRE_RENDERED = container.querySelector('#posts'); if (!PRE_RENDERED) { const posts = await fetch('/posts').then(resp => resp.json()); renderPosts(posts, container); } })(); </script> </html>
import puppeteer from 'puppeteer'; import * as prerender from './ssr.mjs'; import urlModule from 'url'; const URL = urlModule.URL;
app.get('/cron/update_cache', async (req, res) => { if (!req.get('X-Appengine-Cron')) { return res.status(403).send('Sorry, cron handler can only be run as admin.'); }
const browser = await puppeteer.launch(); const homepage = new URL(`${req.protocol}://${req.get('host')}`);
// Re-render main page and a few pages back. prerender.clearCache(); await prerender.ssr(homepage.href, await browser.wsEndpoint()); await prerender.ssr(`${homepage}?year=2018`); await prerender.ssr(`${homepage}?year=2017`); await prerender.ssr(`${homepage}?year=2016`); await browser.close();
... function clearCache() { RENDER_CACHE.clear(); }
export {ssr, clearCache};
## 其他因素
------------------------------------
告诉页面:“你正在被无头浏览器渲染”
当页面正在服务器上的无头 Chrome 中渲染时,客户端逻辑很有必要知道这一信息。我的应用使用了钩子来“关闭”部分不参与渲染 post 节点的页面。举例来说,我禁用了懒加载 firebase-auth.js 这部分代码。根本不需要用户登录!
在 URL 上加一个 ?headless 参数,是一个给页面加钩子的简单方法:
ssr.mjs
import urlModule from 'url'; const URL = urlModule.URL;
async function ssr(url) { ... // Add ?headless to the URL so the page has a signal // it's being loaded by headless Chrome. const renderUrl = new URL(url); renderUrl.searchParams.set('headless', ''); await page.goto(renderUrl, {waitUntil: 'networkidle0'}); ...
return {html}; }
可以在页面内查询该参数:
public/index.html
<html> <body> <div id="container"> <!-- Populated by the JS below. --> </div> </body> <script> ...
(async() => { const params = new URL(location.href).searchParams;
const RENDERING_IN_HEADLESS = params.has('headless'); if (RENDERING_IN_HEADLESS) { // Being rendered by headless Chrome on the server. // e.g. shut off features, don't lazy load non-essential resources, etc. }