HTMLRewriter does not work for head meta tags

I have been trying to use the HTMLRewriter API to remove tags and append new elements to the page head. However, it’s not working. Can someone please help?

async function appendJavascript(response) {
  return new HTMLRewriter()
    .on("meta", new MetaRewriter())
    .on("head", new HeadRewriter())
    .transform(response);
}

class MetaRewriter {
    element(element) {
        if (element.getAttribute('name') === "robots") {
            element.setAttribute('content', 'index,follow');
        }
        if (element.getAttribute('name') === "googlebot") {
            element.setAttribute('content', 'index,follow');
        }
    }
}

class HeadRewriter {
  element(element) {
    element.append(
      `<meta name="robots" content="index,follow">
      <meta name="googlebot" content="index,follow">`,
      {
        html: true
      }
    );

For clarification: I’m able to append elements to the head, but I’m unable to edit/set attributes of existing meta tags. I’m able to edit any other tags, except .

Are you saying, in your above example, that your HeadRewriter class is working as intended, but your MetaRewriter one isn’t? Or that neither of them is working?

Your code looks right to me if we assume appendJavascript() is actually getting called properly (and that the closing braces are present on HeadRewriter). But your MetaRewriter won’t act on the tags added by HeadRewriter; it will only work if the incoming HTML already has those tags.

Everything works great if I’m appending elements, but not for setting attributes.

For instance, if I add: .on(“head > meta:nth-child(1)”, new removeElement()), it works and deletes the element. However, if I try to remove and element by first reading its “name” attribute, it doesn’t work. I’ve spent over 2 days looking into this and, since I’m not an expert, I don’t know what to do.

Something is definitely wrong. If I append a “” tag to every element .on(“head”), that tag is appended to all head elements, except the ones.

This exact code works for me (this is for Pages Functions, because that’s what I had sitting there that I could paste some code into to try):

export async function onRequest(context) {
    const { request, env, params, waitUntil, next, data } = context;

    const res = await env.ASSETS.fetch(request);
    return new HTMLRewriter()
        .on('meta', new MetaHandler())
        .transform(res);
}

class MetaHandler {
    element(element) {
        if (element.getAttribute('name') === 'robots') {
            element.setAttribute('content', 'index,follow');
            return;
        }
    }
}

Your HeadRewriter also works for me. Maybe look at your input and how you’re calling that appendJavascript() function?

Thanks so much for your help. I do have something very similar, but it definitely doesn’t work. My appendJavascript is being called by “fetchAndApply”. Here is a snippet.

For your reference, “HeadRewriter” does work, and both meta tags are appended to the head of the page.

addEventListener("fetch", event => {
  event.respondWith(fetchAndApply(event.request));
});

async function fetchAndApply(request) {
  let url = new URL(request.url);
  url.hostname = 'vizinhos.super.site';

  let response;
  if (url.pathname.startsWith("/app") && url.pathname.endsWith("js")) {
    response = await fetch(url.toString());
    let body = await response.text();
    response = new Response(
      body
        .replace(/vizinhos.super.site/g, MY_DOMAIN)
        .replace(/vizinhos.super.site/g, MY_DOMAIN),
      response
    );
    response.headers.set("Content-Type", "application/x-javascript");
    response.headers.set("X-Robots-Tag", "all");
    return response;

  } else {
    response = await fetch(url.toString(), {
      body: request.body,
      headers: request.headers,
      method: request.method
    });

    response = new Response(response.body, response);
    response.headers.delete("Content-Security-Policy");
    response.headers.delete("X-Content-Security-Policy");
  }

  return appendJavascript(response, SLUG_TO_PAGE);
}

class MetaRewriter {
    element(element) {
        if (element.getAttribute('property') === 'og:url') {
            element.setAttribute('content', 'https://www.vizinhos.org/');
        }
        if (element.getAttribute('property') === 'og:locale') {
            element.setAttribute('content', 'pt_PT');
        }
        if (element.getAttribute('name') === "robots") {
            element.setAttribute('content', 'index,follow');
        }
        if (element.getAttribute('name') === "googlebot") {
            element.setAttribute('content', 'index,follow');
        }
    }
    return;
}

class HeadRewriter {
    element(element) {
        element.append(
          `<meta name="robots" content="index,follow">
           <meta name="googlebot" content="index,follow">`,
          {
            html: true
          }
        );
    } 
}

class BodyRewriter { (…) }

export async function appendJavascript(res, SLUG_TO_PAGE) {
  return new HTMLRewriter()
    .on("meta", new MetaRewriter())
    .on("head", new HeadRewriter())
    .on("body", new BodyRewriter())
    .transform(res);
}

And you’re not expecting MetaRewriter to rewrite the tags added by HeadRewriter, right? In your example they are the same so it doesn’t look like you’re expecting that (but if the page has existing tags it will end up with two).

Sorry, I ended up posting an intermediate solution. Because setAttribute does not work, I tried to use “.remove()” the meta and then append it again with the desired content. But it doesn’t work also. I can’t edit or remove tags.

I have the same problem. It seems that title works, but the others don’t:

[snip]
    this.title = "some title"
    this.description = "some description"
    element(element) {
        // An incoming element, such as `div`
        if (element.tagName == "title") {
            console.log('Replacing title')
            element.setInnerContent(this.title)
        }
        if (element.tagName == "meta") {
            const type = element.getAttribute("property")
            switch (type) {
                case "og:title":
                    console.log('Replacing og:title')
                    element.setInnerContent(this.title)
                    break
                case "og:description":
                    console.log('Replacing og:description')
                    element.setInnerContent(this.description)
                    break
            }
        }
    }
[snip]

In logs:

    {
      "message": [
        "Replacing og:title"
      ],
      "level": "log",
      "timestamp": 1668809500170
    },
    {
      "message": [
        "Replacing og:description"
      ],
      "level": "log",
      "timestamp": 1668809500170
    },
    {
      "message": [
        "Replacing title"
      ],
      "level": "log",
      "timestamp": 1668809500170
    }

However, in the output, only the title tag is replaced, not the meta tags.

Update: I just got the meta tags wrong. You don’t use setInnerContent, you use setAttribute(‘content’, …). It works now.

1 Like