W [ \t\r\n\f] nonascii [\200-\377] ascii [ -~] alphanum [a-z0-9] punct [][!\"#$%&\'()*+,-./:;<=>?@\\^_`{|}~] safepunct [][!#$%&()*+,-./:;=?@\\^_`{|}~] tag {alphanum}+ key ({alphanum}|-)+ val ({alphanum}|{nonascii}|{safepunct})+ %x DOCTYPE %x COMMENT COMMENT_BAD %x TAG_START TAG TAG_ATTR_KEY TAG_ATTR_VAL %x DQUOTED SQUOTED %x SCRIPT_START SCRIPT %x STYLE_START STYLE %{ /*************************************** $Header: /home/amb/CVS/wwwoffle/src/htmlmodify.l,v 1.97 2009-03-13 19:29:50 amb Exp $ WWWOFFLE - World Wide Web Offline Explorer - Version 2.9f. Parse the HTML and modify the source. ******************/ /****************** Written by Andrew M. Bishop This file Copyright 1997-2009 Andrew M. Bishop It may be distributed under the GNU Public License, version 2, or any higher version. See section COPYING of the GNU Public license for conditions under which this file may be redistributed. ***************************************/ #include "autoconfig.h" #include #include #include #include #include #include #if TIME_WITH_SYS_TIME # include # include #else # if HAVE_SYS_TIME_H # include # else # include # endif #endif #include "wwwoffle.h" #include "io.h" #include "misc.h" #include "proto.h" #include "config.h" #include "document.h" /* Parser outputs */ #define LEX_PLAINTEXT 1 #define LEX_COMMENT 2 #define LEX_DOCTYPE 3 #define LEX_TAG_BEGIN 11 #define LEX_TAG_END 12 #define LEX_TAG_END_XHTML 13 #define LEX_ATTR_KEY 21 #define LEX_ATTR_VAL 22 #define LEX_ATTR_VAL_SQ 23 #define LEX_ATTR_VAL_DQ 24 /*+ Tag types +*/ typedef enum _HTMLTags { tag__a = 0 /* "/a" */ , tag_applet = 1 /* "applet" */ , tag__applet = 2 /* "/applet" */ , tag_base = 3 /* "base" */ , tag_blink = 4 /* "blink" */ , tag__blink = 5 /* "/blink" */ , tag__body = 6 /* "/body" */ , tag__embed = 7 /* "/embed" */ , tag__html = 8 /* "/html" */ , tag__iframe = 9 /* "/iframe" */ , tag_marquee =10 /* "marquee" */ , tag__marquee =11 /* "/marquee" */ , tag_noscript =12 /* "noscript" */ , tag__noscript =13 /* "/noscript" */ , tag_param =14 /* "param" */ , tag__object =15 /* "/object" */ , tag_script =16 /* "script" */ , tag__script =17 /* "/script" */ , tag__style =18 /* "/style" */ , tag_complex =19 /* Complex tags, stored and processed as a whole. */, tag_a =20 /* "a" */ , tag_body =21 /* "body" */ , tag_embed =22 /* "embed" */ , tag_iframe =23 /* "iframe" */ , tag_img =24 /* "img" */ , tag_link =25 /* "link" */ , tag_meta =26 /* "meta" */ , tag_object =27 /* "object" */ , tag_style =28 /* "style" */ , tag_td =29 /* "td" */ , tag_ntags =30 } HTMLTags; /*+ Tag strings +*/ static const char* const tags[]= { /* tag__a = 0 */ "/a" , /* tag_applet = 1 */ "applet" , /* tag__applet = 2 */ "/applet" , /* tag_base = 3 */ "base" , /* tag_blink = 4 */ "blink" , /* tag__blink = 5 */ "/blink" , /* tag__body = 6 */ "/body" , /* tag__embed = 7 */ "/embed" , /* tag__html = 8 */ "/html" , /* tag__iframe = 9 */ "/iframe" , /* tag_marquee =10 */ "marquee" , /* tag__marquee =11 */ "/marquee" , /* tag_noscript =12 */ "noscript" , /* tag__noscript =13 */ "/noscript" , /* tag_param =14 */ "param" , /* tag__object =15 */ "/object" , /* tag_script =16 */ "script" , /* tag__script =17 */ "/script" , /* tag__style =18 */ "/style" , /* tag_complex =19 */ "" , /* tag_a =20 */ "a" , /* tag_body =21 */ "body" , /* tag_embed =22 */ "embed" , /* tag_iframe =23 */ "iframe" , /* tag_img =24 */ "img" , /* tag_link =25 */ "link" , /* tag_meta =26 */ "meta" , /* tag_object =27 */ "object" , /* tag_style =28 */ "style" , /* tag_td =29 */ "td" }; /*+ Attribute types +*/ typedef enum _HTMLAttributes { att_background = 0 /* "background" */ , att_classid = 1 /* "classid" */ , att_codetype = 2 /* "codetype" */ , att_content = 3 /* "content" */ , att_data = 4 /* "data" */ , att_height = 5 /* "height" */ , att_href = 6 /* "href" */ , att_http_equiv = 7 /* "http-equiv" */ , att_onblur = 8 /* "onblur" */ , att_onchange = 9 /* "onchange" */ , att_onclick =10 /* "onclick" */ , att_ondblclick =11 /* "ondblclick" */ , att_onerror =12 /* "onerror" */ , att_onfocus =13 /* "onfocus" */ , att_onkeydown =14 /* "onkeydown" */ , att_onkeypress =15 /* "onkeypress" */ , att_onload =16 /* "onload" */ , att_onmousedown =17 /* "onmousedown" */ , att_onmousemove =18 /* "onmousemove" */ , att_onmouseout =19 /* "onmouseout" */ , att_onmouseover =20 /* "onmouseover" */ , att_onmouseup =21 /* "onmouseup" */ , att_onreset =22 /* "onreset" */ , att_onselect =23 /* "onselect" */ , att_onsubmit =24 /* "onsubmit" */ , att_onunload =25 /* "onunload" */ , att_rel =26 /* "rel" */ , att_src =27 /* "src" */ , att_style =28 /* "style" */ , att_type =29 /* "type" */ , att_width =30 /* "width" */ , att_natts =31 } HTMLAttributes; /*+ Attribute strings. +*/ static const char* const attributes[]= { /* att_background = 0 */ "background" , /* att_classid = 1 */ "classid" , /* att_codetype = 2 */ "codetype" , /* att_content = 3 */ "content" , /* att_data = 4 */ "data" , /* att_height = 5 */ "height" , /* att_href = 6 */ "href" , /* att_http_equiv = 7 */ "http-equiv" , /* att_onblur = 8 */ "onblur" , /* att_onchange = 9 */ "onchange" , /* att_onclick =10 */ "onclick" , /* att_ondblclick =11 */ "ondblclick" , /* att_onerror =12 */ "onerror" , /* att_onfocus =13 */ "onfocus" , /* att_onkeydown =14 */ "onkeydown" , /* att_onkeypress =15 */ "onkeypress" , /* att_onload =16 */ "onload" , /* att_onmousedown =17 */ "onmousedown" , /* att_onmousemove =18 */ "onmousemove" , /* att_onmouseout =19 */ "onmouseout" , /* att_onmouseover =20 */ "onmouseover" , /* att_onmouseup =21 */ "onmouseup" , /* att_onreset =22 */ "onreset" , /* att_onselect =23 */ "onselect" , /* att_onsubmit =24 */ "onsubmit" , /* att_onunload =25 */ "onunload" , /* att_rel =26 */ "rel" , /* att_src =27 */ "src" , /* att_style =28 */ "style" , /* att_type =29 */ "type" , /* att_width =30 */ "width" }; /*+ A structure to hold a tag and its attributes. +*/ typedef struct _Tag { HTMLTags type; /*+ The type of the tag. +*/ char *tag; /*+ The Tag itself. +*/ int xhtml; /*+ A flag that is set for an XHTML closing tag '< ... />' +*/ int nattr; /*+ The number of attributes. +*/ int nattr_malloc; /*+ The number of attributes that space is malloced for. +*/ int *attr_type; /*+ The list of attribute types. +*/ char **attr_key; /*+ The list of attribute keys. +*/ char **attr_val; /*+ The list of attribute values. +*/ char **attr_quote; /*+ The list of attribute quotes. +*/ } Tag; /* Microsoft Character mapping */ /*+ The option to convert the characters when seen. +*/ static int demoronise_ms_chars; static int fix_mixed_cyrillic; /* Definitions of why the output is disabled. */ #define DISABLE_NONE 0 #define DISABLE_META 1 #define DISABLE_LINK 2 #define DISABLE_OBJECT 4 #define DISABLE_A 8 #define DISABLE_IFRAME 16 #define DISABLE_IMG 32 #define DISABLE_STYLE 64 #define DISABLE_PARSE 256 /* Local functions */ static void modify_html(URL *Url); static /*@null@*/ char *htmlmodify_yylval=NULL; extern int htmlmodify_yylex(void); static /*@null@*/ char *extract_charset(const char *content_type); static int handle_a_tag(const Tag *tag,int disable_dontget_anchors,int disable_script); static int handle_iframe_tag(const Tag *tag,int disable_dontget_iframes,int disable_script); static void handle_img_tag(Tag *tag,int replace_dontget,const char *dontget_replacement, int replace_webbug,const char *webbug_replacement, int disable_script); static int handle_object_tag(Tag *tag,int replace_dontget,const char *dontget_replacement, int replace_webbug,const char *webbug_replacement, int disable_applet, int disable_flash, int disable_dontget_iframes, int disable_script); static void output_img_or_object_tag(Tag *tag,int src_att, int replace_dontget,const char *dontget_replacement, int replace_webbug,const char *webbug_replacement, int disable_script); static void handle_link_tag(const Tag *tag,int disable_style,int disable_script); static int handle_style_script_tag(const Tag *tag,int disable_script); static void handle_meta_tag(const Tag *tag,int disable_meta_refresh,int disable_meta_refresh_self,int disable_meta_set_cookie); static void output_tag(const Tag *tag,const char *prefix,const char *suffix); static void handle_high_bit(unsigned char ch); /*+ The add-cache-info optional footer. +*/ static /*@null@*/ /*@observer@*/ char *cache_info=NULL; /*+ The base URL of this page. +*/ static /*@null@*/ URL *baseUrl=NULL; /*+ Set this to disable the output. +*/ static int disable_output=DISABLE_NONE; /*++++++++++++++++++++++++++++++++++++++ Output the file with the modificatons if it is HTML, else just output. URL *Url The URL that we are parsing. int spool The file descriptor for the spool file to get the date from. char *content_type The HTTP header containing the content type (and perhaps the charset). ++++++++++++++++++++++++++++++++++++++*/ void OutputHTMLWithModifications(URL *Url,int spool,char *content_type) { static int first=1; if(ConfigBooleanURL(AddCacheInfo,Url)) { struct stat buf; time_t t_ago; char *date,*timeunit,timeago[MAX_INT_STR+1]; fstat(spool,&buf); t_ago=time(NULL)-buf.st_mtime; date=RFC822Date(buf.st_mtime,0); if(t_ago<0) {strcpy(timeago,"?");timeunit="";} else if(t_ago<3600) {sprintf(timeago,"%ld",(long)t_ago/60);timeunit="m";} else if(t_ago<(24*3600)) {sprintf(timeago,"%ld",(long)t_ago/3600);timeunit="h";} else if(t_ago<(14*24*3600)) {sprintf(timeago,"%ld",(long)t_ago/(24*3600));timeunit="d";} else if(t_ago<(30*24*3600)) {sprintf(timeago,"%ld",(long)t_ago/(7*24*3600));timeunit="w";} else {sprintf(timeago,"%ld",(long)t_ago/(30*24*3600));timeunit="M";} cache_info=HTMLMessageString("AddCacheInfo", "url",Url->name, "date",date, "time",timeago, "unit",timeunit, NULL); } demoronise_ms_chars=0; if(ConfigBooleanURL(DemoroniseMSChars,Url)) { char* charset=extract_charset(content_type); if(charset) { if(strcasecmp(charset,"utf-8") && strcasecmp(charset,"koi8-r") && strcasecmp(charset,"euc-kr") && strcasecmp(charset,"big5") && strcasecmp(charset,"iso-2022-jp") && strcasecmp(charset,"chinesebig5")) demoronise_ms_chars=1; free(charset); } else demoronise_ms_chars=1; } fix_mixed_cyrillic=0; if(ConfigBooleanURL(FixMixedCyrillic,Url)) { char* charset=extract_charset(content_type); if(charset) { if(!strcasecmp(charset,"koi8-r")) fix_mixed_cyrillic=1; free(charset); } else fix_mixed_cyrillic=1; } baseUrl=Url; if(!first) htmlmodify_yyrestart(NULL); modify_html(Url); cache_info=NULL; first=0; } /*+ A macro to output the data if valid to do so. +*/ #define YY_OUTPUT(text) \ if(!disable_output && *text) \ wwwoffles_write_data(text,strlen(text)) /*++++++++++++++++++++++++++++++++++++++ Extract the charset from a MIME type and charset. char *extract_charset Returns the charset that it found or NULL if none. const char *content_type The HTTP content type. ++++++++++++++++++++++++++++++++++++++*/ static char *extract_charset(const char *content_type) { const char *p; char *charset=NULL; /* ' *text/html *; *charset *= *["']?...["']?' */ p=content_type; while(*p && *p!=';') p++; if(*p!=';') return(NULL); /* unparseable */ p++; while(isspace(*p)) p++; if(!*p) return(NULL); /* unparseable */ if(!strncasecmp(p,"charset",(size_t)7)) { char *q; p+=7; while(*p && *p!='=') p++; if(*p!='=') return(NULL); /* unparseable */ p++; while(isspace(*p)) p++; if(!*p) return(NULL); /* unparseable */ if(*p=='"' || *p=='\'') p++; charset=(char*)malloc(strlen(p)+1); strcpy(charset,p); q=charset+strlen(p)-1; if(*q=='"' || *q=='\'') *q=0; } return(charset); } /*++++++++++++++++++++++++++++++++++++++ Take the information for the anchor tag and parse it. int handle_a_tag Returns 1 if the anchor was disabled. const Tag *tag The tag information. int disable_dontget_anchors The option to disable links to URLs that are not got. int disable_script Set to true if scripts are disabled. ++++++++++++++++++++++++++++++++++++++*/ static int handle_a_tag(const Tag *tag,int disable_dontget_anchors,int disable_script) { int i; int is_dontget=0,is_script=0; for(i=0;inattr;i++) if(tag->attr_type[i]==att_href && tag->attr_val[i]) { if(disable_dontget_anchors) { URL *linkUrl=LinkURL(baseUrl,tag->attr_val[i]); is_dontget=ConfigBooleanMatchURL(DontGet,linkUrl); FreeURL(linkUrl); } if(disable_script) { if(!strncasecmp("javascript:",tag->attr_val[i],(size_t)11)) is_script=1; } } /* Output the original or modified tag. */ if(disable_dontget_anchors && is_dontget) output_tag(tag,"!-- WWWOFFLE (disable-dontget-links) - "," --"); else if(disable_script && is_script) output_tag(tag,"!-- WWWOFFLE (disable-script) - "," --"); else output_tag(tag,NULL,NULL); return((disable_dontget_anchors && is_dontget) || (disable_script && is_script)); } /*++++++++++++++++++++++++++++++++++++++ Take the information for the iframe tag and parse it. int handle_iframe_tag Returns 1 if the iframe was disabled. const Tag *tag The tag information. int disable_dontget_iframes The option to disable iframes to URLs that are not got. int disable_script Set to true if scripts are disabled. ++++++++++++++++++++++++++++++++++++++*/ static int handle_iframe_tag(const Tag *tag,int disable_dontget_iframes,int disable_script) { int i; int is_dontget=0,is_script=0; for(i=0;inattr;i++) if(tag->attr_type[i]==att_src && tag->attr_val[i]) { if(disable_dontget_iframes) { URL *linkUrl=LinkURL(baseUrl,tag->attr_val[i]); is_dontget=ConfigBooleanMatchURL(DontGet,linkUrl); FreeURL(linkUrl); } if(disable_script) { if(!strncasecmp("javascript:",tag->attr_val[i],(size_t)11)) is_script=1; } } /* Output the original or modified tag. */ if(disable_dontget_iframes && is_dontget) output_tag(tag,"!-- WWWOFFLE (disable-dontget-iframes) - "," --"); else if(disable_script && is_script) output_tag(tag,"!-- WWWOFFLE (disable-script) - "," --"); else output_tag(tag,NULL,NULL); return((disable_dontget_iframes && is_dontget) || (disable_script && is_script)); } /*++++++++++++++++++++++++++++++++++++++ Take the information for the img tag and parse it. Tag *tag The tag information. int replace_dontget The option to replace the images in the DontGet section. const char *dontget_replacement The DontGet replacement image. int replace_webbug The option to replace the 1x1 pixel webbug images. const char *webbug_replacement The webbug replacement image. int disable_script Set to true if scripts are disabled. ++++++++++++++++++++++++++++++++++++++*/ static void handle_img_tag(Tag *tag,int replace_dontget,const char *dontget_replacement, int replace_webbug,const char *webbug_replacement, int disable_script) { int i; int src_att=-1; for(i=0;inattr;i++) if(tag->attr_type[i]==att_src && tag->attr_val[i]) src_att=i; output_img_or_object_tag(tag,src_att,replace_dontget,dontget_replacement,replace_webbug,webbug_replacement,disable_script); } /*++++++++++++++++++++++++++++++++++++++ Output an image tag or an object tag that contains an image. Tag *tag The tag. int src_att The attribute number the contains the URI. int replace_dontget The option to replace the images in the DontGet section. const char *dontget_replacement The DontGet replacement image. int replace_webbug The option to replace the 1x1 pixel webbug images. const char *webbug_replacement The webbug replacement image. int disable_script Set to true if scripts are disabled. ++++++++++++++++++++++++++++++++++++++*/ static void output_img_or_object_tag(Tag *tag,int src_att, int replace_dontget,const char *dontget_replacement, int replace_webbug,const char *webbug_replacement, int disable_script) { int i; int is_dontget=0,is_webbug=0,is_script=0; if(src_att>=0) { if(replace_dontget) { URL *linkUrl=LinkURL(baseUrl,tag->attr_val[src_att]); is_dontget=ConfigBooleanMatchURL(DontGet,linkUrl); FreeURL(linkUrl); } if(replace_webbug) { int width=1000,height=1000; for(i=0;inattr;i++) if(tag->attr_type[i]==att_width && tag->attr_val[i]) width=atoi(tag->attr_val[i]); else if(tag->attr_type[i]==att_height && tag->attr_val[i]) height=atoi(tag->attr_val[i]); if(width<=1 && height<=1) is_webbug=1; } if(disable_script) { if(!strncasecmp("javascript:",tag->attr_val[src_att],(size_t)11)) is_script=1; } } /* Modify the src attribute (if required). */ if(is_dontget) { output_tag(tag,"!-- WWWOFFLE (replace-dontget-images) - "," --"); tag->attr_val[src_att]=(char*)realloc((void*)tag->attr_val[src_att],strlen(dontget_replacement)+1); strcpy(tag->attr_val[src_att],dontget_replacement); } else if(is_webbug) { output_tag(tag,"!-- WWWOFFLE (replace-webbug-images) - "," --"); tag->attr_val[src_att]=(char*)realloc((void*)tag->attr_val[src_att],strlen(webbug_replacement)+1); strcpy(tag->attr_val[src_att],webbug_replacement); } else if(is_script) output_tag(tag,"!-- WWWOFFLE (replace-scripts) - "," --"); /* Blank the alt & title attributes or add an empty alt (if required). */ if(!is_script && (is_dontget || is_webbug)) { int seen_alt=0; for(i=0;inattr;i++) if(!strcasecmp(tag->attr_key[i],"alt") || !strcasecmp(tag->attr_key[i],"title")) { if(tag->attr_val[i]) free(tag->attr_val[i]); tag->attr_val [i]=(char*)calloc((size_t)1,1); tag->attr_quote[i]="\""; seen_alt=1; } if(!seen_alt) { if(tag->nattr==tag->nattr_malloc) { tag->attr_type=(int*)realloc((void*)tag->attr_type,(tag->nattr_malloc+1)*sizeof(int)); tag->attr_key=(char**)realloc((void*)tag->attr_key,(tag->nattr_malloc+1)*sizeof(char*)); tag->attr_val=(char**)realloc((void*)tag->attr_val,(tag->nattr_malloc+1)*sizeof(char*)); tag->attr_quote=(char**)realloc((void*)tag->attr_quote,(tag->nattr_malloc+1)*sizeof(char*)); tag->attr_key[tag->nattr_malloc]=NULL; tag->attr_val[tag->nattr_malloc]=NULL; tag->nattr_malloc+=1; } tag->attr_type [tag->nattr]=att_natts; tag->attr_key [tag->nattr]=(char*)malloc((size_t)4); strcpy(tag->attr_key[tag->nattr],"alt"); tag->attr_val [tag->nattr]=(char*)calloc((size_t)1,(size_t)1); tag->attr_quote[tag->nattr]="\""; tag->nattr++; } } /* Output the original or modified tag. */ if(!is_script) output_tag(tag,NULL,NULL); } /*++++++++++++++++++++++++++++++++++++++ Take the information for the object tag and parse it. int handle_object_tag Returns 1 if the object is Java and disabled, 2 if Flash and disabled, 3 if equivalent to an iframe and disabled. Tag *tag The tag information. int replace_dontget The option to replace the images in the DontGet section. const char *dontget_replacement The DontGet replacement image. int replace_webbug The option to replace the 1x1 pixel webbug images. const char *webbug_replacement The webbug replacement image. int disable_applet The option to disable Java applets. int disable_flash The option to disable Flash animations. int disable_dontget_iframes The option to disable inline frames that are on the dontget list. int disable_script Set to true if scripts are disabled. ++++++++++++++++++++++++++++++++++++++*/ static int handle_object_tag(Tag *tag,int replace_dontget,const char *dontget_replacement, int replace_webbug,const char *webbug_replacement, int disable_applet, int disable_flash, int disable_dontget_iframes, int disable_script) { int i; int is_image=0,is_java=0,is_flash=0,is_inline=0,is_dontget=0,is_script=0; int data_att=-1; /* Check for images. */ if(replace_dontget || replace_webbug) { for(i=0;inattr;i++) if((tag->attr_type[i]==att_codetype && tag->attr_val[i] && !strncasecmp(tag->attr_val[i],"image",(size_t)5)) || (tag->attr_type[i]==att_type && tag->attr_val[i] && !strncasecmp(tag->attr_val[i],"image",(size_t)5))) is_image=1; else if(tag->attr_type[i]==att_data && tag->attr_val[i]) data_att=i; } /* Check for Java */ if(disable_applet) { for(i=0;inattr;i++) if(tag->attr_type[i]==att_codetype && tag->attr_val[i] && !strncasecmp(tag->attr_val[i],"application/java",(size_t)16)) is_java=1; else if(tag->attr_type[i]==att_classid && tag->attr_val[i] && !strncasecmp(tag->attr_val[i],"java:",(size_t)5)) is_java=1; } /* Check for Flash */ if(disable_flash) { for(i=0;inattr;i++) if((tag->attr_type[i]==att_codetype || tag->attr_type[i]==att_type) && tag->attr_val[i] && !strncasecmp(tag->attr_val[i],"application/x-shockwave-flash",(size_t)29)) is_flash=1; else if(tag->attr_type[i]==att_classid && tag->attr_val[i] && !strncasecmp(tag->attr_val[i],"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",(size_t)42)) is_flash=1; else if(tag->attr_type[i]==att_src && tag->attr_val[i] && !strncasecmp(tag->attr_val[i]+strlen(tag->attr_val[i])-4,".swf",(size_t)4)) is_flash=1; } /* Check for inline HTML (text) object */ if(disable_dontget_iframes) { for(i=0;inattr;i++) if((tag->attr_type[i]==att_codetype && tag->attr_val[i] && !strncasecmp(tag->attr_val[i],"text",(size_t)4)) || (tag->attr_type[i]==att_type && tag->attr_val[i] && !strncasecmp(tag->attr_val[i],"text",(size_t)4))) is_inline=1; else if(tag->attr_type[i]==att_data && tag->attr_val[i]) data_att=i; if(is_inline && data_att>=0) { if(disable_dontget_iframes) { URL *linkUrl=LinkURL(baseUrl,tag->attr_val[data_att]); is_dontget=ConfigBooleanMatchURL(DontGet,linkUrl); FreeURL(linkUrl); } if(disable_script) { if(!strncasecmp("javascript:",tag->attr_val[data_att],(size_t)11)) is_script=1; } } } /* Output the original or modified tag. */ if(is_image && (replace_dontget || replace_webbug || disable_script)) output_img_or_object_tag(tag,data_att,replace_dontget,dontget_replacement,replace_webbug,webbug_replacement,disable_script); else if(disable_applet && is_java) output_tag(tag,"!-- WWWOFFLE (disable-applet) - "," --"); else if(disable_flash && is_flash) output_tag(tag,"!-- WWWOFFLE (disable-flash) - "," --"); else if(is_inline && disable_dontget_iframes && is_dontget) output_tag(tag,"!-- WWWOFFLE (disable-dontget-iframes) - "," --"); else if(is_inline && disable_script && is_script) output_tag(tag,"!-- WWWOFFLE (disable-scripts) - "," --"); else output_tag(tag,NULL,NULL); return(is_image?0: (disable_applet && is_java)?1: (disable_flash && is_flash)?2: (is_inline && disable_dontget_iframes && is_dontget)?3: (is_inline && disable_script && is_script)?3: 0); } /*++++++++++++++++++++++++++++++++++++++ Take the information for the link tag and parse it. const Tag *tag The tag information. int disable_style Set to true if stylesheets are disabled. int disable_script Set to true if scripts are disabled. ++++++++++++++++++++++++++++++++++++++*/ static void handle_link_tag(const Tag *tag,int disable_style,int disable_script) { int i; int is_stylesheet=0,is_script=0; for(i=0;inattr;i++) if(tag->attr_type[i]==att_rel && tag->attr_val[i]) { if(disable_style) { if(!strncasecmp(tag->attr_val[i],"Stylesheet",(size_t)10)) is_stylesheet=1; } if(disable_script) { if(!strncasecmp(tag->attr_val[i],"javascript:",(size_t)11)) is_script=1; } } /* Output the original or modified tag. */ if(disable_style && is_stylesheet) output_tag(tag,"!-- WWWOFFLE (disable-style) - "," --"); else if(disable_script && is_script) output_tag(tag,"!-- WWWOFFLE (disable-script) - "," --"); else output_tag(tag,NULL,NULL); } /*++++++++++++++++++++++++++++++++++++++ Take the information for the style tag and parse it. int handle_style_script_tag Returns true if a script was detected. const Tag *tag The tag information. int disable_script Set to true if scripts are disabled. See "WhiteHat Security Advisory [Number: WH-08152001-1]" for the details of this. The HTML