Functino

My daily struggle with webdevelopment

Highlight search keywords like google cache does

If you ever clicked on a google cache link you know their highlighting. Some time ago I wrote a prototype.js-based highlighiting script to achieve the same. Whenever someone finds your site in a search engine this scripts highlights the entered keywords. By default there are only a bunch of search engines included but you can add as many as you want.

It’s kind of a work in progress because I wrote it some time ago and I think there is room for improvement. But fornow: Feel free to use it. I would be happy if you give me feedback.

Usage is simple, just get the keywords:

var keywords = (new SearchKeywords()).get();

And then highlight them:

var hi = new Highlighter();
hi.highlight(keywords);

Or as a one-liner:

(new Highlighter()).highlight((new SearchKeywords()).get());

Why are these two classes? Because i wanted the possibility to use them seperately. You can use Highlighter to highlight everything – not just searchterms, like so:

var hi = new Highlighter();
hi.highlight("Hello World!");

So here’s the actual code. Just copy&paste it into a .js-file, include it in your pages and you can use it as described above. There are some more options which are described in the source code:

/**
 * You can use SearchKeywords() to recognize if a user came through a search engine (defined in DEFAULT_OPTIONS.engines)
 * and SearchKeyWords#get() will return an array of entered keywords.
 * Example usage:
 * <code>
 * 	var sk = new SearchKeywords();
 * 	var keywords = sk.get(); // will return all keywords if the user came through a search engine
 * 
 * 	var sk2 = new SearchKeywords({'min_length':5});
 * 	var keywords = sk.get(); // will return only keywords with a minimum lenght of 5 chars
 * </code>
 * For other options than min_length look at the DEFAULT_OPTIONS hash.
 */
var SearchKeywords = Class.create({
	/**
	 * @access public
	 * @param object/hash options - optional hash with options
	 * @return SearchKeywords
	 */	
	initialize: function(options){
		this.options = Object.extend(SearchKeywords.DEFAULT_OPTIONS, options || {});
		this.referrer = this.options.referrer;
		this.keywords = [];
		this._start();
	},
	/**
	 * Returns an array of all keywords
	 * @access public
	 * @return Array 
	 */
	get: function() {
		return this.keywords;
	},	
	/**
	 * checks if the referrer is a search engine and returns the keywords
	 * @access private
	 * @return void
	 */	
	_start: function(){
		this.options.engines.each(function(engine){
			console.log(this.referrer);
			if(this.referrer.include(engine.search)) {
				console.log('found');
				var query_string = this.referrer.split('?');
				if (query_string[1]){
					var params = query_string[1].split('&');
					for(var k=0; k<params.length; k++) {
						var param = params[k].split('=');
						if (param[0] == engine.parameter){
							this._parse(decodeURI(param[1].replace(/\+/g,' ')));
							return;
						}
					}
				}
			}
		}, this);
	},
	/**
	 * splits the search query in separate keywords
	 * @access private
	 * @return void
	 */		
	_parse: function(searchterm) {
		searchterm = searchterm.replace(/\"/g,"");
		var terms = searchterm.split(' ');
		var i = 0;
		terms.each(function(term){
			if(term.length < this.options.min_length) {
				return false;
			}
			if(this.options.keyword_blacklist.include(term.toLowerCase())) {
				return false;
			}
			this.keywords.push(term);
		}, this);
	}
});
SearchKeywords.DEFAULT_OPTIONS = {
	/**
	 * Here you can specify different search engines and the parameter that is used for keyword detection
	 */
	engines: [{'search': '.google.', 'parameter': 'q'},
	          {'search': 'search.yahoo.', 'parameter': 'p'},
	          {'search': '.bing.', 'parameter': 'q'},
	          {'search': 'altavista.', 'parameter': 'q'},
	          {'search': 'baidu.', 'parameter': 'wd'},
	          {'search': 'ask', 'parameter': 'q'}
			],
	/**
	 * You can blacklist keywords like "and", "or" or "site:"
	 * All strings in this array will be ignored.
	 */
	keyword_blacklist: ["and", "or", "site:"],
	/**
	 * Keywords with a length smaller than min_length will be ignored
	 */
	min_length: 2,
	/**
	 * By default SearchKeywords uses the javascript referrer. For testing purposs you can set this
	 * to whatever you want
	 */	
	referrer: document.referrer
};
 
 
/**
 * Highlights all given terms/keywords like google does in its cache.
 * Herefore Highlighter parses the dom and everytime a keyword is found it gets highlighted.
 * 
 * Example usage:
 * <code>
 * 	var hi = new Highlighter();
 * 	hi.highlight("Hello World"); // highlights the text "Hello World"
 * 
 * 	var hi2 = new Highlighter({'class_names':false}); //doen't add css classes now
 * 	hi.highlight(["word1", "word2"]); // highlights word1 and word2
 * 
 * 	// You can use Highlighter with SearchKeywords like this:
 * 	var hi = new Highlighter();
 * 	hi.highlight((new SearchKeywords()).get());
 * </code>
 *
 * There are a bunch of options to customize the behaviour of Highlighter. Take a look at DEFAULT_OPTIONS
 */
var Highlighter = Class.create({
	/**
	 * @access public
	 * @param object/hash options
	 * @return Highlighter
	 */
	initialize: function(options){
		this.counter = 0;
		this.options = Object.extend(Highlighter.DEFAULT_OPTIONS, options || {});
	},
	/**
	 * Takes a string or an array of strings and highlihgts them.
	 * @access public
	 * @param mixed terms - String or array of strings
	 * @return void
	 */
	highlight: function(terms) {
		if(typeof terms == "string") {
			terms = [terms];
		}
 
		var infoHashes = [];
		var styleHashes = [];
		terms.each(function(term) {
			if(this.options.styles) {
				var styles = this.options.styles[this.counter % this.options.styles.length];
			}
			else {
				var styles = {};
			}
			if(this.options.class_names) {
				var class_name = this.options.class_names[this.counter % this.options.class_names.length];
			}
			else {
				var class_name = "";
			}
			var hash = {
					'class': class_name,
					'styles': styles
			};
			var infoHash = {'term':term, 'terms':[]};
 
			styleHashes.push(hash);
 
			// is our term included in one of the other, longer terms?
			terms.each(function(term2) {
				if(term2.include(term) && term != term2) {
					infoHash.terms.push(term2);
				}
			});			
			infoHashes.push(infoHash);
			this.counter++;
		}, this);
 
		// iterate over all terms and highlight tehm
		var notFound = 0;
		var i = 0;
		infoHashes.each(function(termInfo){
			this.found = false;
			try {
				this._highlight(termInfo, document.body, styleHashes[i - notFound]);	
			}
			catch(e){}
 
			// term not found
			if(!this.found) {
				notFound++;
			}
			i++;
		}, this);
	},
	/**
	 * @access private
	 * @param object/Hash termInfo hash with the keyword and some other infos 
	 * @param HtmlElement container
	 * @param object/Hash hash
	 * @return void
	 */
	_highlight: function(termInfo, container, hash) {
		var term = termInfo.term;
		var term_low = term.toLowerCase();
		if(this.options.case_sensitive) {
			term_low = term;
		}
		for(var i=0; i<container.childNodes.length; i++) {
			var node = container.childNodes[i];
			if (node.nodeType == 3) {
				// Element is a text-node
				var data = node.data;
				var data_low = data.toLowerCase();
				if(this.options.case_sensitive) {
					data_low = data;
				}
				if (data_low.include(term_low)) {
					// it the term is embedded in a longer term - don't highlight it now
					var locked = false;
					termInfo.terms.each(function(t){
						if(data_low.include(t.toLowerCase())) {
							locked = true;
						}
					});
					if(!locked) {
						this.found = true;
						var new_node = new Element(this.options.element);
						node.parentNode.replaceChild(new_node, node);
						var result;
						while((result = data_low.indexOf(term_low)) != -1) {
							new_node.insert(data.substr(0, result));
							var highlighted_node = new Element(this.options.element);
							highlighted_node.setStyle(hash.styles);
							highlighted_node.addClassName(hash.class);
							highlighted_node.insert(data.substr(result,term.length));
							highlighted_node.addClassName(this.options.class_name);
							new_node.insert(highlighted_node);
							data = data.substr(result + term.length);
							data_low = data.toLowerCase();
						}
						new_node.insert(data);
					}
				}
			}
			else {
				if(!this.options.element_blacklist.include(node.nodeName.toLowerCase())) {
					if(!$(node) || !$(node).hasClassName(this.options.class_name)) {
						this._highlight(termInfo, node, hash);
					}
				}
			}
		}
	}
});
Highlighter.DEFAULT_OPTIONS = {
	/**
	 * These styles will be used for the highlighting By default these are the "google colors"
	 * You can set this option to false if you want to use the css class name based highlighting.
	 */
	styles: [{'backgroundColor':'#FFFF66', 'color':'#000'},{'backgroundColor':'#A0FFFF', 'color':'#000'}, {'backgroundColor':'#99FF99', 'color':'#000'}, {'backgroundColor':'#FF9999', 'color':'#000'}, {'backgroundColor':'#FF66FF', 'color':'#000'}],
	/**
	 * Array of classnames that are added to the found keywords.
	 */
	class_names: ["highlighted_keyword_1", "highlighted_keyword_2", "highlighted_keyword_3"],
	/**
	 * This class is added to each found keyword
	 */
	class_name: "highlighted_keyword",
	/**
	 * Blacklist of HTML elements that shouldn't get parsed.
	 */
	element_blacklist: ["select", "script", "title", "link", "input", "style"],
	/**
	 * All found keyword will be inserted in an html element. By default this is <spanhack>.
	 * I know, that's not a standard element. But this is used to not accidently break a layout
	 * beacause "span" or "div" already have weird css definitions...
	 */
	element: 'spanhack',
	/**
	* Should the search be case senstive? By default "hello" and "Hello" are considered the same.
	*/
	case_sensitive:false
};

Technically it just walks through the dom – whenever it encounters a text node it checks if a given keyword is included. If there is a keyword included it inserts an element which is than highlighted via css.
Simple as that. If you have improvements just take the code and use/edit it as you want.

Leave a Reply