用正则表达式处理html的实践

2018年05月10日

1// 清除标签中style属性
2function clearStyle(html) {
3  return html.replace(/(<[a-zA-Z]+?\s.*?)(style=".*?")(.*?>)/g, (match, first, _, third) => {
4    return `${first}${third}`;
5  });
6}
 1// 清除标签中除了src/href外所有的属性
 2function clearAttr(html) {
 3  return html.replace(/<([a-zA-Z]+?)\s(.*?)>/g, (match, tag, other) => {
 4    let attr = '';
 5    other.replace(/(src|href)=".+?"/, match => {
 6      attr = ' ' + match;
 7    });
 8    return `<${tag}${attr}>`;
 9  });
10}
1// </p>或</div>标签后添加换行符
2function addLine(html) {
3  return html.replace(/(<\/p>|<\/div>)/g, match => {
4    return `${match}\n`;
5  });
6}