Jump to content

User:SD0001/parseTemplate/sandbox.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// <nowiki>
/**
 * Returns an array of objects representing the usages of a given set of 
 * templates in the given wikitext. The object key-value pairs are the template 
 * |parameter=value pairs.
 * 
 * Piped links, nested templates, nowiki tags and even HTML comments in parameter 
 * values are adequately accounted for. 
 * 
 * If resolveRedirects is set as true, any transclusions of the redirects of the
 * templates would also be found. The function returns a promise instead of an
 * object and the result array is available as a parameter to the promise's 
 * done function
 * 
 * Usage: Can be executed from the browser console or within another script.
 *
 * @param {string} wikitext  Wikitext in which to search for the templates
 * 
 * @param {(string[]|string)} templates  Name of the template page, or array of 
 * template page names. Need not necessarily be pages in template namespace, 
 * any page can be used. If no namespace is provided, it is assumed as Template:.
 * If `resolveRedirects` is set, then there should be no more than 50 templates.
 * 
 * @param {boolean} [resolveRedirects=false]  Also check for transclusions of 
 * redirects of the specified `templates`?
 * 
 * @returns {Object[]}  If `resolveRedirects` is unset
 *			{Promise}   If `resolveRedirects` is set
 *						The result array is available as a parameter to the 
 *							promise done function.
 * 
 * @throws if  (i)  The end of template is not found in the wikitext
 *            (ii)  API call is unsuccessful (resolveRedirects mode only)
 * 
 * ISSUES:
 * 1. It is possible that a template usage found be entirely within a comment or 
 *    nowiki tags.
 * 2. Very rare situations where, within a parameter value, there are nowiki tags 
 *    inside a comment, or vice-versa, will cause problems.
 * 
 * Found any other bug? Report at [[User talk:SD0001]] or via email.
 * 
 */
 
 /* jshint maxerr: 999 */

window.parseTemplate = function (wikitext, templates, resolveRedirects) {

	var strReplaceAt = function (string, index, char) {
		return string.slice(0,index) + char + string.slice(index + 1);
	};

	var pageNameRegex = function (name) {
		return '[' + name[0].toUpperCase() + name[0].toLowerCase() + ']' + 
			mw.util.escapeRegExp(name.slice(1)).replace(/ |_/g,'[ _]');
	};
	
	var namespaceRegex = function (namespaceNumber) {
		if (namespaceNumber === 0) {
			return ":";
		}
		var namespaceRegex = "";
		for ( var alias in mw.config.get('wgNamespaceIds') ) {
			if ( mw.config.get('wgNamespaceIds')[alias] === namespaceNumber ) {
				if (alias[0].toUpperCase() === alias[0].toLowerCase()) {
					namespaceRegex += alias;
				} else {
					namespaceRegex += '[' + alias[0] + alias[0].toUpperCase() + ']' + alias.slice(1);
				}
				namespaceRegex += '|';
			}
		}
		namespaceRegex = namespaceRegex.slice(0, -1).replace(/_/g, '[ _]') + ':';
		if (namespaceNumber === 10) { // namespace name optional for template space
			return '(?:' + namespaceRegex + ')?';
		}
		return namespaceRegex;
	};

	var result = [];

	if (typeof templates === 'string') {
		templates = [ templates ];
	}

	var processTemplate = function processtemplate(t) {
		
		var mwTitle = new mw.Title(t);
		if (mwTitle.namespace === 0 && !t.startsWith(':')) {
			mwTitle.namespace = 10; // now its a template!
		}
		var re_string = namespaceRegex(mwTitle.namespace) + pageNameRegex(mwTitle.title);

		var t_re = new RegExp( '(\\{\\{\\s*' + re_string + '\\s*)(\\||\\}\\})', 'g');

		var match = t_re.exec(wikitext);
		while (match) {

			var startIdx = match.index + match[1].length + 1;

			// number of unclosed braces
			var numUnclosed = 2;

			// are we inside a comment or between nowiki tags?
			var inCommentOrNowiki = false;

			var i, n = wikitext.length;
			
			for ( i = startIdx; i < n; i++ ) {
				if (! inCommentOrNowiki) {
					if (wikitext[i] === '{' && wikitext[i+1] === '{') {
						numUnclosed += 2;
						i++;
					} else if (wikitext[i] === '}' && wikitext[i+1] === '}') {
						numUnclosed -= 2;
						i++;
						if(numUnclosed === 0) {
							break;
						}
					} else if (wikitext[i] === '|' && numUnclosed > 2) {
						// swap out pipes in nested templates with \1 character
						wikitext = strReplaceAt(wikitext, i,'\1');
					} else if (/^(<!--|<nowiki ?>)/.test(wikitext.slice(i, i + 9))) {
						inCommentOrNowiki = true;
						i += 3;
					}

				} else { // we are in a comment or nowiki
					if (wikitext[i] === '|') {
						// swap out pipes with \1 character
						wikitext = strReplaceAt(wikitext, i,'\1');
					} else if (/^(-->|<\/nowiki ?>)/.test(wikitext.slice(i, i + 10))) {
						inCommentOrNowiki = false;
						i += 2;
					}
				}
				
			}

			if (numUnclosed !== 0) {
				throw new Error('[parseTemplate] Failed to find closing }} of ' + t);
			}

			// text is the template text excluding the the starting {{ and ending }}
			var text = match[1].slice(2) + wikitext.slice(startIdx - 1, i - 1);

			// swap out pipe in links with \1 control character
			text = text.replace(/(\[\[[^\]]*?)\|(.*?\]\])/g, '$1\1$2')
			// [[File: ]] can have multiple pipes, let's do this a couple of times more
				.replace(/(\[\[File:[^\]]*?)\|(.*?\]\])/g, '$1\1$2')
				.replace(/(\[\[File:[^\]]*?)\|(.*?\]\])/g, '$1\1$2');
				
			var chunks = text.split('|');
			var res = {};

			// name of the template as used in the wikitext is saved as 0th index of the object
			res[0] = chunks[0].trim();

			var unnamedIdx = 1;

			for (i=1; i < chunks.length; i++) {
				var indexOfEqualTo = chunks[i].indexOf('=');
				if (indexOfEqualTo === -1) {
					res[unnamedIdx++] = chunks[i].replace(/\1/g,'|').trim();
				} else {
					var key = chunks[i].slice(0, indexOfEqualTo).trim();
					if (key.indexOf('{{') !== -1) {
						res[unnamedIdx++] = chunks[i].replace(/\1/g,'|').trim();
						continue;
					}
					var val = chunks[i].slice(indexOfEqualTo + 1).replace(/\1/g,'|').trim();
					// changed back '\1' in value to pipes
					res[key] = val;
				}
			}

			result.push(res);

			match = t_re.exec(wikitext);
		}

	};

	templates.forEach(processTemplate);

	if (resolveRedirects === true || (resolveRedirects === 'ifNotFound' && result.length === 0)) {
		
		if (templates.length > 50) {
			console.log("ERROR: templates.length > 50");
		}
		
		var ApiQueryContinuous = function(mwApi, query, limit) {
			limit = limit || 10;
			var responses = [];
			var callApi = function(query, count) {
				return mwApi.get(query).then(function(response) {
		            responses.push(response);
		            if (response.continue && count < limit) {
		                return callApi($.extend({}, query, response.continue), count + 1);
		            } else {
						return responses;
		            }
		        });
		    };
			return callApi(query, 1);
		};
		
		return ApiQueryContinuous(new mw.Api(), {
			"action": "query",
			"format": "json", 
			"prop": "linkshere",
			"titles": templates,
			"lhshow": "redirect",
			"lhlimit": "max"
		}).then(function(responses) {
			responses.forEach(function(response) {
				$.map(response.query.pages, function(page) {
					page.linkshere.forEach(function(item) {
						var templateRedir = item.title;
						if (item.ns === '0') {
							templateRedir = ':' + templateRedir;
						}
						processTemplate(templateRedir);
					});
				});
			});
			return result;
		}, function(err) {
			throw new Error('[parseTemplate] API call for getting redirects failed', err);
		});
		
		
	} else if (!resolveRedirects) {
		return result;
	} else {
		return $.Deferred().resolve(result);
	}

};
// </nowiki>