模块:Citation/CS1

来自维阿百科
跳转至: 导航搜索

  1.  
  2. local z = {
  3. error_categories = {}; -- for categorizing citations that contain errors
  4. error_ids = {};
  5. message_tail = {};
  6. maintenance_cats = {}; -- for categorizing citations that aren't erroneous per se, but could use a little work
  7. properties_cats = {}; -- for categorizing citations based on certain properties, language of source for instance
  8. }
  9.  
  10. --[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
  11. ]]
  12. local dates, year_date_check -- functions in Module:Citation/CS1/Date_validation
  13.  
  14. local cfg = {}; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
  15. local whitelist = {}; -- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist
  16.  
  17. --[[--------------------------< I S _ S E T >------------------------------------------------------------------
  18.  
  19. Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.
  20. This function is global because it is called from both this module and from Date validation
  21.  
  22. ]]
  23. function is_set( var )
  24. return not (var == nil or var == '');
  25. end
  26.  
  27. --[[--------------------------< F I R S T _ S E T >------------------------------------------------------------
  28.  
  29. Locates and returns the first set value in a table of values where the order established in the table,
  30. left-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set.
  31.  
  32. This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs. With the pairs
  33. version the order of evaluation could not be guaranteed. With the ipairs version, a nil value would terminate
  34. the for-loop before it reached the actual end of the list.
  35.  
  36. ]]
  37.  
  38. local function first_set (list, count)
  39. local i = 1;
  40. while i <= count do -- loop through all items in list
  41. if is_set( list[i] ) then
  42. return list[i]; -- return the first set list member
  43. end
  44. i = i + 1; -- point to next
  45. end
  46. end
  47.  
  48. --[[--------------------------< I N _ A R R A Y >--------------------------------------------------------------
  49.  
  50. Whether needle is in haystack
  51.  
  52. ]]
  53.  
  54. local function in_array( needle, haystack )
  55. if needle == nil then
  56. return false;
  57. end
  58. for n,v in ipairs( haystack ) do
  59. if v == needle then
  60. return n;
  61. end
  62. end
  63. return false;
  64. end
  65.  
  66. --[[--------------------------< S U B S T I T U T E >----------------------------------------------------------
  67.  
  68. Populates numbered arguments in a message string using an argument table.
  69.  
  70. ]]
  71.  
  72. local function substitute( msg, args )
  73. return args and mw.message.newRawMessage( msg, args ):plain() or msg;
  74. end
  75.  
  76. --[[--------------------------< E R R O R _ C O M M E N T >----------------------------------------------------
  77.  
  78. Wraps error messages with css markup according to the state of hidden.
  79.  
  80. ]]
  81. local function error_comment( content, hidden )
  82. return substitute( hidden and cfg.presentation['hidden-error'] or cfg.presentation['visible-error'], content );
  83. end
  84.  
  85. --[[--------------------------< S E T _ E R R O R >--------------------------------------------------------------
  86.  
  87. Sets an error condition and returns the appropriate error message. The actual placement of the error message in the output is
  88. the responsibility of the calling function.
  89.  
  90. ]]
  91. local function set_error( error_id, arguments, raw, prefix, suffix )
  92. local error_state = cfg.error_conditions[ error_id ];
  93. prefix = prefix or "";
  94. suffix = suffix or "";
  95. if error_state == nil then
  96. error( cfg.messages['undefined_error'] );
  97. elseif is_set( error_state.category ) then
  98. table.insert( z.error_categories, error_state.category );
  99. end
  100. local message = substitute( error_state.message, arguments );
  101. message = message .. " ([[" .. cfg.messages['help page link'] ..
  102. "#" .. error_state.anchor .. "|" ..
  103. cfg.messages['help page label'] .. "]])";
  104. z.error_ids[ error_id ] = true;
  105. if in_array( error_id, { 'bare_url_missing_title', 'trans_missing_title' } )
  106. and z.error_ids['citation_missing_title'] then
  107. return '', false;
  108. end
  109. message = table.concat({ prefix, message, suffix });
  110. if raw == true then
  111. return message, error_state.hidden;
  112. end
  113. return error_comment( message, error_state.hidden );
  114. end
  115.  
  116. --[[--------------------------< A D D _ M A I N T _ C A T >------------------------------------------------------
  117.  
  118. Adds a category to z.maintenance_cats using names from the configuration file with additional text if any.
  119. To prevent duplication, the added_maint_cats table lists the categories by key that have been added to z.maintenance_cats.
  120.  
  121. ]]
  122.  
  123. local added_maint_cats = {} -- list of maintenance categories that have been added to z.maintenance_cats
  124. local function add_maint_cat (key, arguments)
  125. if not added_maint_cats [key] then
  126. added_maint_cats [key] = true; -- note that we've added this category
  127. table.insert( z.maintenance_cats, substitute (cfg.maint_cats [key], arguments)); -- make name then add to table
  128. end
  129. end
  130.  
  131. --[[--------------------------< A D D _ P R O P _ C A T >--------------------------------------------------------
  132.  
  133. Adds a category to z.properties_cats using names from the configuration file with additional text if any.
  134.  
  135. ]]
  136.  
  137. local added_prop_cats = {} -- list of property categories that have been added to z.properties_cats
  138. local function add_prop_cat (key, arguments)
  139. if not added_prop_cats [key] then
  140. added_prop_cats [key] = true; -- note that we've added this category
  141. table.insert( z.properties_cats, substitute (cfg.prop_cats [key], arguments)); -- make name then add to table
  142. end
  143. end
  144.  
  145. --[[--------------------------< A D D _ V A N C _ E R R O R >----------------------------------------------------
  146.  
  147. Adds a single Vancouver system error message to the template's output regardless of how many error actually exist.
  148. To prevent duplication, added_vanc_errs is nil until an error message is emitted.
  149.  
  150. ]]
  151.  
  152. local added_vanc_errs; -- flag so we only emit one Vancouver error / category
  153. local function add_vanc_error ()
  154. if not added_vanc_errs then
  155. added_vanc_errs = true; -- note that we've added this category
  156. table.insert( z.message_tail, { set_error( 'vancouver', {}, true ) } );
  157. end
  158. end
  159.  
  160.  
  161. --[[--------------------------< I S _ S C H E M E >------------------------------------------------------------
  162.  
  163. does this thing that purports to be a uri scheme seem to be a valid scheme? The scheme is checked to see if it
  164. is in agreement with http://tools.ietf.org/html/std66#section-3.1 which says:
  165. Scheme names consist of a sequence of characters beginning with a
  166. letter and followed by any combination of letters, digits, plus
  167. ("+"), period ("."), or hyphen ("-").
  168.  
  169. returns true if it does, else false
  170.  
  171. ]]
  172.  
  173. local function is_scheme (scheme)
  174. return scheme and scheme:match ('^%a[%a%d%+%.%-]*:'); -- true if scheme is set and matches the pattern
  175. end
  176.  
  177.  
  178. --[=[-------------------------< I S _ D O M A I N _ N A M E >--------------------------------------------------
  179.  
  180. Does this thing that purports to be a domain name seem to be a valid domain name?
  181.  
  182. Syntax defined here: http://tools.ietf.org/html/rfc1034#section-3.5
  183. BNF defined here: https://tools.ietf.org/html/rfc4234
  184. Single character names are generally reserved; see https://tools.ietf.org/html/draft-ietf-dnsind-iana-dns-01#page-15;
  185. see also [[Single-letter second-level domain]]
  186. list of tlds: https://www.iana.org/domains/root/db
  187.  
  188. rfc952 (modified by rfc 1123) requires the first and last character of a hostname to be a letter or a digit. Between
  189. the first and last characters the name may use letters, digits, and the hyphen.
  190.  
  191. Also allowed are IPv4 addresses. IPv6 not supported
  192.  
  193. domain is expected to be stripped of any path so that the last character in the last character of the tld. tld
  194. is two or more alpha characters. Any preceding '//' (from splitting a url with a scheme) will be stripped
  195. here. Perhaps not necessary but retained incase it is necessary for IPv4 dot decimal.
  196.  
  197. There are several tests:
  198. the first character of the whole domain name including subdomains must be a letter or a digit
  199. single-letter/digit second-level domains in the .org TLD
  200. q, x, and z SL domains in the .com TLD
  201. i and q SL domains in the .net TLD
  202. single-letter SL domains in the ccTLDs (where the ccTLD is two letters)
  203. two-character SL domains in gTLDs (where the gTLD is two or more letters)
  204. three-plus-character SL domains in gTLDs (where the gTLD is two or more letters)
  205. IPv4 dot-decimal address format; TLD not allowed
  206.  
  207. returns true if domain appears to be a proper name and tld or IPv4 address, else false
  208.  
  209. ]=]
  210.  
  211. local function is_domain_name (domain)
  212. if not domain then
  213. return false; -- if not set, abandon
  214. end
  215. domain = domain:gsub ('^//', ''); -- strip '//' from domain name if present; done here so we only have to do it once
  216. if not domain:match ('^[%a%d]') then -- first character must be letter or digit
  217. return false;
  218. end
  219. if domain:match ('%f[%a%d][%a%d]%.org$') then -- one character .org hostname
  220. return true;
  221. elseif domain:match ('%f[%a][qxz]%.com$') then -- assigned one character .com hostname (x.com times out 2015-12-10)
  222. return true;
  223. elseif domain:match ('%f[%a][iq]%.net$') then -- assigned one character .net hostname (q.net registered but not active 2015-12-10)
  224. return true;
  225. elseif domain:match ('%f[%a%d][%a%d][%a%d%-]+[%a%d]%.xn%-%-[%a%d]+$') then -- internationalized domain name with ACE prefix
  226. return true;
  227. elseif domain:match ('%f[%a%d][%a%d]%.cash$') then -- one character/digit .cash hostname
  228. return true;
  229. elseif domain:match ('%f[%a%d][%a%d]%.%a%a$') then -- one character hostname and cctld (2 chars)
  230. return true;
  231. elseif domain:match ('%f[%a%d][%a%d][%a%d]%.%a%a+$') then -- two character hostname and tld
  232. return true;
  233. elseif domain:match ('%f[%a%d][%a%d][%a%d%-]+[%a%d]%.%a%a+$') then -- three or more character hostname.hostname or hostname.tld
  234. return true;
  235. elseif domain:match ('^%d%d?%d?%.%d%d?%d?%.%d%d?%d?%.%d%d?%d?') then -- IPv4 address
  236. return true;
  237. else
  238. return false;
  239. end
  240. end
  241.  
  242.  
  243. --[[--------------------------< I S _ U R L >------------------------------------------------------------------
  244.  
  245. returns true if the scheme and domain parts of a url appear to be a valid url; else false.
  246.  
  247. This function is the last step in the validation process. This function is separate because there are cases that
  248. are not covered by split_url(), for example is_parameter_ext_wikilink() which is looking for bracketted external
  249. wikilinks.
  250.  
  251. ]]
  252.  
  253. local function is_url (scheme, domain)
  254. if is_set (scheme) then -- if scheme is set check it and domain
  255. return is_scheme (scheme) and is_domain_name (domain);
  256. else
  257. return is_domain_name (domain); -- scheme not set when url is protocol relative
  258. end
  259. end
  260.  
  261.  
  262. --[[--------------------------< S P L I T _ U R L >------------------------------------------------------------
  263.  
  264. Split a url into a scheme, authority indicator, and domain.
  265. If protocol relative url, return nil scheme and domain else return nil for both scheme and domain.
  266.  
  267. When not protocol relative, get scheme, authority indicator, and domain. If there is an authority indicator (one
  268. or more '/' characters following the scheme's colon), make sure that there are only 2.
  269.  
  270. ]]
  271.  
  272. local function split_url (url_str)
  273. local scheme, authority, domain;
  274. url_str = url_str:gsub ('([%a%d])%.?[/%?#].*$', '%1'); -- strip FQDN terminator and path(/), query(?), fragment (#) (the capture prevents false replacement of '//')
  275.  
  276. if url_str:match ('^//%S*') then -- if there is what appears to be a protocol relative url
  277. domain = url_str:match ('^//(%S*)')
  278. elseif url_str:match ('%S-:/*%S+') then -- if there is what appears to be a scheme, optional authority indicator, and domain name
  279. scheme, authority, domain = url_str:match ('(%S-:)(/*)(%S+)'); -- extract the scheme, authority indicator, and domain portions
  280. authority = authority:gsub ('//', '', 1); -- replace place 1 pair of '/' with nothing;
  281. if is_set(authority) then -- if anything left (1 or 3+ '/' where authority should be) then
  282. return scheme; -- return scheme only making domain nil which will cause an error message
  283. end
  284. domain = domain:gsub ('(%a):%d+', '%1'); -- strip port number if present
  285. end
  286. return scheme, domain;
  287. end
  288.  
  289.  
  290. --[[--------------------------< L I N K _ P A R A M _ O K >---------------------------------------------------
  291.  
  292. checks the content of |title-link=, |series-link=, |author-link= etc for properly formatted content: no wikilinks, no urls
  293.  
  294. Link parameters are to hold the title of a wikipedia article so none of the WP:TITLESPECIALCHARACTERS are allowed:
  295. # < > [ ] | { } _
  296. except the underscore which is used as a space in wiki urls and # which is used for section links
  297.  
  298. returns false when the value contains any of these characters.
  299.  
  300. When there are no illegal characters, this function returns TRUE if value DOES NOT appear to be a valid url (the
  301. |<param>-link= parameter is ok); else false when value appears to be a valid url (the |<param>-link= parameter is NOT ok).
  302.  
  303. ]]
  304.  
  305. local function link_param_ok (value)
  306. local scheme, domain;
  307. if value:find ('[<>%[%]|{}]') then -- if any prohibited characters
  308. return false;
  309. end
  310.  
  311. scheme, domain = split_url (value); -- get scheme or nil and domain or nil from url;
  312. return not is_url (scheme, domain); -- return true if value DOES NOT appear to be a valid url
  313. end
  314.  
  315.  
  316. --[[--------------------------< C H E C K _ U R L >------------------------------------------------------------
  317.  
  318. Determines whether a URL string appears to be valid.
  319.  
  320. First we test for space characters. If any are found, return false. Then split the url into scheme and domain
  321. portions, or for protocol relative (//example.com) urls, just the domain. Use is_url() to validate the two
  322. portions of the url. If both are valid, or for protocol relative if domain is valid, return true, else false.
  323.  
  324. ]]
  325.  
  326. local function check_url( url_str )
  327. if nil == url_str:match ("^%S+$") then -- if there are any spaces in |url=value it can't be a proper url
  328. return false;
  329. end
  330. local scheme, domain;
  331.  
  332. scheme, domain = split_url (url_str); -- get scheme or nil and domain or nil from url;
  333. return is_url (scheme, domain); -- return true if value appears to be a valid url
  334. end
  335.  
  336.  
  337. --[=[-------------------------< I S _ P A R A M E T E R _ E X T _ W I K I L I N K >----------------------------
  338.  
  339. Return true if a parameter value has a string that begins and ends with square brackets [ and ] and the first
  340. non-space characters following the opening bracket appear to be a url. The test will also find external wikilinks
  341. that use protocol relative urls. Also finds bare urls.
  342.  
  343. The frontier pattern prevents a match on interwiki links which are similar to scheme:path urls. The tests that
  344. find bracketed urls are required because the parameters that call this test (currently |title=, |chapter=, |work=,
  345. and |publisher=) may have wikilinks and there are articles or redirects like '//Hus' so, while uncommon, |title=[[//Hus]]
  346. is possible as might be [[en://Hus]].
  347.  
  348. ]=]
  349.  
  350. local function is_parameter_ext_wikilink (value)
  351. local scheme, domain;
  352.  
  353. value = value:gsub ('([^%s/])/[%a%d].*', '%1'); -- strip path information (the capture prevents false replacement of '//')
  354.  
  355. if value:match ('%f[%[]%[%a%S*:%S+.*%]') then -- if ext wikilink with scheme and domain: [xxxx://yyyyy.zzz]
  356. scheme, domain = value:match ('%f[%[]%[(%a%S*:)(%S+).*%]')
  357. elseif value:match ('%f[%[]%[//%S*%.%S+.*%]') then -- if protocol relative ext wikilink: [//yyyyy.zzz]
  358. domain = value:match ('%f[%[]%[//(%S*%.%S+).*%]');
  359. elseif value:match ('%a%S*:%S+') then -- if bare url with scheme; may have leading or trailing plain text
  360. scheme, domain = value:match ('(%a%S*:)(%S+)');
  361. elseif value:match ('//%S*%.%S+') then -- if protocol relative bare url: //yyyyy.zzz; may have leading or trailing plain text
  362. domain = value:match ('//(%S*%.%S+)'); -- what is left should be the domain
  363. else
  364. return false; -- didn't find anything that is obviously a url
  365. end
  366.  
  367. return is_url (scheme, domain); -- return true if value appears to be a valid url
  368. end
  369.  
  370.  
  371. --[[-------------------------< C H E C K _ F O R _ U R L >-----------------------------------------------------
  372.  
  373. loop through a list of parameters and their values. Look at the value and if it has an external link, emit an error message.
  374.  
  375. ]]
  376.  
  377. local function check_for_url (parameter_list)
  378. local error_message = '';
  379. for k, v in pairs (parameter_list) do -- for each parameter in the list
  380. if is_parameter_ext_wikilink (v) then -- look at the value; if there is a url add an error message
  381. if is_set(error_message) then -- once we've added the first portion of the error message ...
  382. error_message=error_message .. ", "; -- ... add a comma space separator
  383. end
  384. error_message=error_message .. "&#124;" .. k .. "="; -- add the failed parameter
  385. end
  386. end
  387. if is_set (error_message) then -- done looping, if there is an error message, display it
  388. table.insert( z.message_tail, { set_error( 'param_has_ext_link', {error_message}, true ) } );
  389. end
  390. end
  391.  
  392.  
  393. --[[--------------------------< S A F E _ F O R _ I T A L I C S >----------------------------------------------
  394.  
  395. Protects a string that will be wrapped in wiki italic markup '' ... ''
  396.  
  397. Note: We cannot use <i> for italics, as the expected behavior for italics specified by ''...'' in the title is that
  398. they will be inverted (i.e. unitalicized) in the resulting references. In addition, <i> and '' tend to interact
  399. poorly under Mediawiki's HTML tidy.
  400.  
  401. ]]
  402.  
  403. local function safe_for_italics( str )
  404. if not is_set(str) then
  405. return str;
  406. else
  407. if str:sub(1,1) == "'" then str = "<span></span>" .. str; end
  408. if str:sub(-1,-1) == "'" then str = str .. "<span></span>"; end
  409. -- Remove newlines as they break italics.
  410. return str:gsub( '\n', ' ' );
  411. end
  412. end
  413.  
  414. --[[--------------------------< S A F E _ F O R _ U R L >------------------------------------------------------
  415.  
  416. Escape sequences for content that will be used for URL descriptions
  417.  
  418. ]]
  419.  
  420. local function safe_for_url( str )
  421. if str:match( "%[%[.-%]%]" ) ~= nil then
  422. table.insert( z.message_tail, { set_error( 'wikilink_in_url', {}, true ) } );
  423. end
  424. return str:gsub( '[%[%]\n]', {
  425. ['['] = '&#91;',
  426. [']'] = '&#93;',
  427. ['\n'] = ' ' } );
  428. end
  429.  
  430. --[[--------------------------< W R A P _ S T Y L E >----------------------------------------------------------
  431.  
  432. Applies styling to various parameters. Supplied string is wrapped using a message_list configuration taking one
  433. argument; protects italic styled parameters. Additional text taken from citation_config.presentation - the reason
  434. this function is similar to but separate from wrap_msg().
  435.  
  436. ]]
  437.  
  438. local function wrap_style (key, str)
  439. if not is_set( str ) then
  440. return "";
  441. elseif in_array( key, { 'italic-title', 'trans-italic-title' } ) then
  442. str = safe_for_italics( str );
  443. end
  444.  
  445. return substitute( cfg.presentation[key], {str} );
  446. end
  447.  
  448. --[[--------------------------< E X T E R N A L _ L I N K >----------------------------------------------------
  449.  
  450. Format an external link with error checking
  451.  
  452. ]]
  453.  
  454. local function external_link( URL, label, source )
  455. local error_str = "";
  456. if not is_set( label ) then
  457. label = URL;
  458. if is_set( source ) then
  459. error_str = set_error( 'bare_url_missing_title', { wrap_style ('parameter', source) }, false, " " );
  460. else
  461. error( cfg.messages["bare_url_no_origin"] );
  462. end
  463. end
  464. if not check_url( URL ) then
  465. error_str = set_error( 'bad_url', {wrap_style ('parameter', source)}, false, " " ) .. error_str;
  466. end
  467. return table.concat({ "[", URL, " ", safe_for_url( label ), "]", error_str });
  468. end
  469.  
  470. --[[--------------------------< E X T E R N A L _ L I N K _ I D >----------------------------------------------
  471.  
  472. Formats a wiki style external link
  473.  
  474. ]]
  475.  
  476. local function external_link_id(options)
  477. local url_string = options.id;
  478. if options.encode == true or options.encode == nil then
  479. url_string = mw.uri.encode( url_string );
  480. end
  481. return mw.ustring.format( '[%s%s%s \<span title\=\"%s\"\>%s%s%s\<\/span\>]',
  482. options.prefix, url_string, options.suffix or "",
  483. options.link, options.label, options.separator or "&nbsp;",
  484. mw.text.nowiki(options.id)
  485. );
  486. end
  487.  
  488. --[[--------------------------< D E P R E C A T E D _ P A R A M E T E R >--------------------------------------
  489.  
  490. Categorize and emit an error message when the citation contains one or more deprecated parameters. The function includes the
  491. offending parameter name to the error message. Only one error message is emitted regardless of the number of deprecated
  492. parameters in the citation.
  493.  
  494. ]]
  495.  
  496. local page_in_deprecated_cat; -- sticky flag so that the category is added only once
  497. local function deprecated_parameter(name)
  498. if not page_in_deprecated_cat then
  499. page_in_deprecated_cat = true; -- note that we've added this category
  500. table.insert( z.message_tail, { set_error( 'deprecated_params', {name}, true ) } ); -- add error message
  501. end
  502. end
  503.  
  504. --[[--------------------------< K E R N _ Q U O T E S >--------------------------------------------------------
  505.  
  506. Apply kerning to open the space between the quote mark provided by the Module and a leading or trailing quote mark contained in a |title= or |chapter= parameter's value.
  507. This function will positive kern either single or double quotes:
  508. "'Unkerned title with leading and trailing single quote marks'"
  509. " 'Kerned title with leading and trailing single quote marks' " (in real life the kerning isn't as wide as this example)
  510. Double single quotes (italic or bold wikimarkup) are not kerned.
  511.  
  512. Call this function for chapter titles, for website titles, etc; not for book titles.
  513.  
  514. ]]
  515.  
  516. local function kern_quotes (str)
  517. local cap='';
  518. local cap2='';
  519. cap, cap2 = str:match ("^([\"\'])([^\'].+)"); -- match leading double or single quote but not double single quotes
  520. if is_set (cap) then
  521. str = substitute (cfg.presentation['kern-left'], {cap, cap2});
  522. end
  523.  
  524. cap, cap2 = str:match ("^(.+[^\'])([\"\'])$")
  525. if is_set (cap) then
  526. str = substitute (cfg.presentation['kern-right'], {cap, cap2});
  527. end
  528. return str;
  529. end
  530.  
  531. --[[--------------------------< F O R M A T _ S C R I P T _ V A L U E >----------------------------------------
  532.  
  533. |script-title= holds title parameters that are not written in Latin based scripts: Chinese, Japanese, Arabic, Hebrew, etc. These scripts should
  534. not be italicized and may be written right-to-left. The value supplied by |script-title= is concatenated onto Title after Title has been wrapped
  535. in italic markup.
  536.  
  537. Regardless of language, all values provided by |script-title= are wrapped in <bdi>...</bdi> tags to isolate rtl languages from the English left to right.
  538.  
  539. |script-title= provides a unique feature. The value in |script-title= may be prefixed with a two-character ISO639-1 language code and a colon:
  540. |script-title=ja:*** *** (where * represents a Japanese character)
  541. Spaces between the two-character code and the colon and the colon and the first script character are allowed:
  542. |script-title=ja : *** ***
  543. |script-title=ja: *** ***
  544. |script-title=ja :*** ***
  545. Spaces preceding the prefix are allowed: |script-title = ja:*** ***
  546.  
  547. The prefix is checked for validity. If it is a valid ISO639-1 language code, the lang attribute (lang="ja") is added to the <bdi> tag so that browsers can
  548. know the language the tag contains. This may help the browser render the script more correctly. If the prefix is invalid, the lang attribute
  549. is not added. At this time there is no error message for this condition.
  550.  
  551. Supports |script-title= and |script-chapter=
  552.  
  553. TODO: error messages when prefix is invalid ISO639-1 code; when script_value has prefix but no script;
  554. ]]
  555.  
  556. local function format_script_value (script_value)
  557. local lang=''; -- initialize to empty string
  558. local name;
  559. if script_value:match('^%l%l%s*:') then -- if first 3 non-space characters are script language prefix
  560. lang = script_value:match('^(%l%l)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script
  561. if not is_set (lang) then
  562. return ''; -- script_value was just the prefix so return empty string
  563. end
  564. -- if we get this far we have prefix and script
  565. name = mw.language.fetchLanguageName( lang, mw.getContentLanguage():getCode() ); -- get language name so that we can use it to categorize
  566. if is_set (name) then -- is prefix a proper ISO 639-1 language code?
  567. script_value = script_value:gsub ('^%l%l%s*:%s*', ''); -- strip prefix from script
  568. -- is prefix one of these language codes?
  569. if in_array (lang, {'ar', 'bg', 'bs', 'dv', 'el', 'fa', 'he', 'hy', 'ja', 'ka', 'ko', 'ku', 'mk', 'ps', 'ru', 'sd', 'sr', 'th', 'uk', 'ug', 'yi', 'zh'}) then
  570. add_prop_cat ('script_with_name', {name, lang})
  571. else
  572. add_prop_cat ('script')
  573. end
  574. lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute
  575. else
  576. lang = ''; -- invalid so set lang to empty string
  577. end
  578. end
  579. if is_set(script_value) then
  580. script_value = '-{R|' .. script_value .. '}-';
  581. end
  582. script_value = substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is rtl
  583.  
  584. return script_value;
  585. end
  586.  
  587. --[[--------------------------< S C R I P T _ C O N C A T E N A T E >------------------------------------------
  588.  
  589. Initially for |title= and |script-title=, this function concatenates those two parameter values after the script value has been
  590. wrapped in <bdi> tags.
  591. ]]
  592.  
  593. local function script_concatenate (title, script)
  594. if is_set(title) then
  595. title = '' .. title .. '';
  596. end
  597. if is_set (script) then
  598. script = format_script_value (script); -- <bdi> tags, lang atribute, categorization, etc; returns empty string on error
  599. if is_set (script) then
  600. title = title .. ' ' .. script; -- concatenate title and script title
  601. end
  602. end
  603. return title;
  604. end
  605.  
  606.  
  607. --[[--------------------------< W R A P _ M S G >--------------------------------------------------------------
  608.  
  609. Applies additional message text to various parameter values. Supplied string is wrapped using a message_list
  610. configuration taking one argument. Supports lower case text for {{citation}} templates. Additional text taken
  611. from citation_config.messages - the reason this function is similar to but separate from wrap_style().
  612.  
  613. ]]
  614.  
  615. local function wrap_msg (key, str, lower)
  616. if not is_set( str ) then
  617. return "";
  618. end
  619. if true == lower then
  620. local msg;
  621. msg = cfg.messages[key]:lower(); -- set the message to lower case before
  622. return substitute( msg, str ); -- including template text
  623. else
  624. return substitute( cfg.messages[key], str );
  625. end
  626. end
  627.  
  628.  
  629. --[[-------------------------< I S _ A L I A S _ U S E D >-----------------------------------------------------
  630.  
  631. This function is used by select_one() to determine if one of a list of alias parameters is in the argument list
  632. provided by the template.
  633.  
  634. Input:
  635. args – pointer to the arguments table from calling template
  636. alias – one of the list of possible aliases in the aliases lists from Module:Citation/CS1/Configuration
  637. index – for enumerated parameters, identifies which one
  638. enumerated – true/false flag used choose how enumerated aliases are examined
  639. value – value associated with an alias that has previously been selected; nil if not yet selected
  640. selected – the alias that has previously been selected; nil if not yet selected
  641. error_list – list of aliases that are duplicates of the alias already selected
  642.  
  643. Returns:
  644. value – value associated with alias we selected or that was previously selected or nil if an alias not yet selected
  645. selected – the alias we selected or the alias that was previously selected or nil if an alias not yet selected
  646.  
  647. ]]
  648.  
  649. local function is_alias_used (args, alias, index, enumerated, value, selected, error_list)
  650. if enumerated then -- is this a test for an enumerated parameters?
  651. alias = alias:gsub ('#', index); -- replace '#' with the value in index
  652. else
  653. alias = alias:gsub ('#', ''); -- remove '#' if it exists
  654. end
  655.  
  656. if is_set(args[alias]) then -- alias is in the template's argument list
  657. if value ~= nil and selected ~= alias then -- if we have already selected one of the aliases
  658. local skip;
  659. for _, v in ipairs(error_list) do -- spin through the error list to see if we've added this alias
  660. if v == alias then
  661. skip = true;
  662. break; -- has been added so stop looking
  663. end
  664. end
  665. if not skip then -- has not been added so
  666. table.insert( error_list, alias ); -- add error alias to the error list
  667. end
  668. else
  669. value = args[alias]; -- not yet selected an alias, so select this one
  670. selected = alias;
  671. end
  672. end
  673. return value, selected; -- return newly selected alias, or previously selected alias
  674. end
  675.  
  676.  
  677. --[[--------------------------< S E L E C T _ O N E >----------------------------------------------------------
  678.  
  679. Chooses one matching parameter from a list of parameters to consider. The list of parameters to consider is just
  680. names. For parameters that may be enumerated, the position of the numerator in the parameter name is identified
  681. by the '#' so |author-last1= and |author1-last= are represented as 'author-last#' and 'author#-last'.
  682.  
  683. Because enumerated parameter |<param>1= is an alias of |<param>= we must test for both possibilities.
  684.  
  685.  
  686. Generates an error if more than one match is present.
  687.  
  688. ]]
  689.  
  690. local function select_one( args, aliases_list, error_condition, index )
  691. local value = nil; -- the value assigned to the selected parameter
  692. local selected = ''; -- the name of the parameter we have chosen
  693. local error_list = {};
  694.  
  695. if index ~= nil then index = tostring(index); end
  696.  
  697. for _, alias in ipairs( aliases_list ) do -- for each alias in the aliases list
  698. if alias:match ('#') then -- if this alias can be enumerated
  699. if '1' == index then -- when index is 1 test for enumerated and non-enumerated aliases
  700. value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); -- first test for non-enumerated alias
  701. end
  702. value, selected = is_alias_used (args, alias, index, true, value, selected, error_list); -- test for enumerated alias
  703. else
  704. value, selected = is_alias_used (args, alias, index, false, value, selected, error_list); --test for non-enumerated alias
  705. end
  706. end
  707.  
  708. if #error_list > 0 and 'none' ~= error_condition then -- for cases where this code is used outside of extract_names()
  709. local error_str = "";
  710. for _, k in ipairs( error_list ) do
  711. if error_str ~= "" then error_str = error_str .. cfg.messages['parameter-separator'] end
  712. error_str = error_str .. wrap_style ('parameter', k);
  713. end
  714. if #error_list > 1 then
  715. error_str = error_str .. cfg.messages['parameter-final-separator'];
  716. else
  717. error_str = error_str .. cfg.messages['parameter-pair-separator'];
  718. end
  719. error_str = error_str .. wrap_style ('parameter', selected);
  720. table.insert( z.message_tail, { set_error( error_condition, {error_str}, true ) } );
  721. end
  722. return value, selected;
  723. end
  724.  
  725.  
  726. --[[--------------------------< F O R M A T _ C H A P T E R _ T I T L E >--------------------------------------
  727.  
  728. Format the four chapter parameters: |script-chapter=, |chapter=, |trans-chapter=, and |chapter-url= into a single Chapter meta-
  729. parameter (chapter_url_source used for error messages).
  730.  
  731. ]]
  732.  
  733. local function format_chapter_title (scriptchapter, chapter, transchapter, chapterurl, chapter_url_source, no_quotes)
  734. local chapter_error = '';
  735. if not is_set (chapter) then
  736. chapter = ''; -- to be safe for concatenation
  737. else
  738. if false == no_quotes then
  739. chapter = kern_quotes (chapter); -- if necessary, separate chapter title's leading and trailing quote marks from Module provided quote marks
  740. chapter = wrap_style ('quoted-title', chapter);
  741. end
  742. end
  743.  
  744. chapter = script_concatenate (chapter, scriptchapter) -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
  745.  
  746. if is_set (transchapter) then
  747. transchapter = wrap_style ('trans-quoted-title', transchapter);
  748. if is_set (chapter) then
  749. chapter = chapter .. ' ' .. transchapter;
  750. else -- here when transchapter without chapter or script-chapter
  751. chapter = transchapter; --
  752. chapter_error = ' ' .. set_error ('trans_missing_title', {'chapter'});
  753. end
  754. end
  755.  
  756. if is_set (chapterurl) then
  757. chapter = external_link (chapterurl, chapter, chapter_url_source); -- adds bare_url_missing_title error if appropriate
  758. end
  759.  
  760. return chapter .. chapter_error;
  761. end
  762.  
  763. --[[--------------------------< H A S _ I N V I S I B L E _ C H A R S >----------------------------------------
  764.  
  765. This function searches a parameter's value for nonprintable or invisible characters. The search stops at the
  766. first match.
  767.  
  768. This function will detect the visible replacement character when it is part of the wikisource.
  769.  
  770. Detects but ignores nowiki and math stripmarkers. Also detects other named stripmarkers (gallery, math, pre, ref)
  771. and identifies them with a slightly different error message. See also coins_cleanup().
  772.  
  773. Detects but ignores the character pattern that results from the transclusion of {{'}} templates.
  774.  
  775. Output of this function is an error message that identifies the character or the Unicode group, or the stripmarker
  776. that was detected along with its position (or, for multi-byte characters, the position of its first byte) in the
  777. parameter value.
  778.  
  779. ]]
  780.  
  781. local function has_invisible_chars (param, v)
  782. local position = ''; -- position of invisible char or starting position of stripmarker
  783. local dummy; -- end of matching string; not used but required to hold end position when a capture is returned
  784. local capture; -- used by stripmarker detection to hold name of the stripmarker
  785. local i=1;
  786. local stripmarker, apostrophe;
  787.  
  788. while cfg.invisible_chars[i] do
  789. local char=cfg.invisible_chars[i][1] -- the character or group name
  790. local pattern=cfg.invisible_chars[i][2] -- the pattern used to find it
  791. position, dummy, capture = mw.ustring.find (v, pattern) -- see if the parameter value contains characters that match the pattern
  792. if position then
  793. if 'nowiki' == capture or 'math' == capture or -- nowiki and math stripmarkers (not an error condition)
  794. ('templatestyles' == capture) then -- templatestyles stripmarker allowed
  795. stripmarker = true; -- set a flag
  796. elseif true == stripmarker and 'delete' == char then -- because stripmakers begin and end with the delete char, assume that we've found one end of a stripmarker
  797. position = nil; -- unset
  798. elseif 'apostrophe' == char then -- apostrophe template uses &zwj;, hair space and zero-width space
  799. apostrophe = true;
  800. elseif true == apostrophe and in_array (char, {'zero width joiner', 'zero width space', 'hair space'}) then
  801. position = nil; -- unset
  802. else
  803. local err_msg;
  804. if capture then
  805. err_msg = capture .. ' ' .. cfg.invisible_chars[i][3] or char;
  806. else
  807. err_msg = cfg.invisible_chars[i][3] or (char .. ' character');
  808. end
  809.  
  810. table.insert( z.message_tail, { set_error( 'invisible_char', {err_msg, wrap_style ('parameter', param), position}, true ) } ); -- add error message
  811. return; -- and done with this parameter
  812. end
  813. end
  814. i=i+1; -- bump our index
  815. end
  816. end
  817.  
  818.  
  819. --[[--------------------------< A R G U M E N T _ W R A P P E R >----------------------------------------------
  820.  
  821. Argument wrapper. This function provides support for argument mapping defined in the configuration file so that
  822. multiple names can be transparently aliased to single internal variable.
  823.  
  824. ]]
  825.  
  826. local function argument_wrapper( args )
  827. local origin = {};
  828. return setmetatable({
  829. ORIGIN = function( self, k )
  830. local dummy = self[k]; --force the variable to be loaded.
  831. return origin[k];
  832. end
  833. },
  834. {
  835. __index = function ( tbl, k )
  836. if origin[k] ~= nil then
  837. return nil;
  838. end
  839. local args, list, v = args, cfg.aliases[k];
  840. if type( list ) == 'table' then
  841. v, origin[k] = select_one( args, list, 'redundant_parameters' );
  842. if origin[k] == nil then
  843. origin[k] = ''; -- Empty string, not nil
  844. end
  845. elseif list ~= nil then
  846. v, origin[k] = args[list], list;
  847. else
  848. -- maybe let through instead of raising an error?
  849. -- v, origin[k] = args[k], k;
  850. error( cfg.messages['unknown_argument_map'] );
  851. end
  852. -- Empty strings, not nil;
  853. if v == nil then
  854. v = cfg.defaults[k] or '';
  855. origin[k] = '';
  856. end
  857. tbl = rawset( tbl, k, v );
  858. return v;
  859. end,
  860. });
  861. end
  862.  
  863. --[[--------------------------< V A L I D A T E >--------------------------------------------------------------
  864. Looks for a parameter's name in the whitelist.
  865.  
  866. Parameters in the whitelist can have three values:
  867. true - active, supported parameters
  868. false - deprecated, supported parameters
  869. nil - unsupported parameters
  870. ]]
  871.  
  872. local function validate( name )
  873. local name = tostring( name );
  874. local state = whitelist.basic_arguments[ name ];
  875. -- Normal arguments
  876. if true == state then return true; end -- valid actively supported parameter
  877. if false == state then
  878. deprecated_parameter (name); -- parameter is deprecated but still supported
  879. return true;
  880. end
  881. -- Arguments with numbers in them
  882. name = name:gsub( "%d+", "#" ); -- replace digit(s) with # (last25 becomes last#
  883. state = whitelist.numbered_arguments[ name ];
  884. if true == state then return true; end -- valid actively supported parameter
  885. if false == state then
  886. deprecated_parameter (name); -- parameter is deprecated but still supported
  887. return true;
  888. end
  889. return false; -- Not supported because not found or name is set to nil
  890. end
  891.  
  892.  
  893. -- Formats a wiki style internal link
  894. local function internal_link_id(options)
  895. return mw.ustring.format( '[[%s%s%s|\<span title\=\"%s\"\>%s\<\/span\>%s%s]]',
  896. options.prefix, options.id, options.suffix or "",
  897. options.link, options.label, options.separator or "&nbsp;",
  898. mw.text.nowiki(options.id)
  899. );
  900. end
  901.  
  902. local function tidy_date(date)
  903. if date:match("^%d%d%d%d%-%d%d?%-%d%d?$") then
  904. local y, m, d = date:match("(%d%d%d%d)%-(%d%d?)%-(%d%d?)")
  905. return y..'-'..string.format('%02d', m)..'-'..string.format('%02d', d)
  906. else
  907. return date
  908. end
  909. end
  910. --[[--------------------------< N O W R A P _ D A T E >--------------------------------------------------------
  911.  
  912. When date is YYYY-MM-DD format wrap in nowrap span: <span ...>YYYY-MM-DD</span>. When date is DD MMMM YYYY or is
  913. MMMM DD, YYYY then wrap in nowrap span: <span ...>DD MMMM</span> YYYY or <span ...>MMMM DD,</span> YYYY
  914.  
  915. DOES NOT yet support MMMM YYYY or any of the date ranges.
  916.  
  917. ]]
  918.  
  919. local function nowrap_date (date)
  920. local cap='';
  921. local cap2='';
  922.  
  923. if date:match("^%d%d%d%d%-%d%d%-%d%d$") then
  924. date = substitute (cfg.presentation['nowrap1'], date);
  925. elseif date:match("^%a+%s*%d%d?,%s+%d%d%d%d$") or date:match ("^%d%d?%s*%a+%s+%d%d%d%d$") then
  926. cap, cap2 = string.match (date, "^(.*)%s+(%d%d%d%d)$");
  927. date = substitute (cfg.presentation['nowrap2'], {cap, cap2});
  928. end
  929. return date;
  930. end
  931.  
  932. --[[--------------------------< IS _ V A L I D _ I S X N >-----------------------------------------------------
  933.  
  934. ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. ISBN-13 is checked in check_isbn().
  935. If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length and stripped of dashes,
  936. spaces and other non-isxn characters.
  937.  
  938. ]]
  939.  
  940. local function is_valid_isxn (isxn_str, len)
  941. local temp = 0;
  942. isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58
  943. len = len+1; -- adjust to be a loop counter
  944. for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum
  945. if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58)
  946. temp = temp + 10*( len - i ); -- it represents 10 decimal
  947. else
  948. temp = temp + tonumber( string.char(v) )*(len-i);
  949. end
  950. end
  951. return temp % 11 == 0; -- returns true if calculation result is zero
  952. end
  953.  
  954.  
  955. --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------
  956.  
  957. ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit.
  958. If the number is valid, the result will be 0. Before calling this function, isbn-13/ismn must be checked for length
  959. and stripped of dashes, spaces and other non-isxn-13 characters.
  960.  
  961. ]]
  962.  
  963. local function is_valid_isxn_13 (isxn_str)
  964. local temp=0;
  965. isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39
  966. for i, v in ipairs( isxn_str ) do
  967. temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit
  968. end
  969. return temp % 10 == 0; -- sum modulo 10 is zero when isbn-13/ismn is correct
  970. end
  971.  
  972. --[[--------------------------< C H E C K _ I S B N >------------------------------------------------------------
  973.  
  974. Determines whether an ISBN string is valid
  975.  
  976. ]]
  977.  
  978. local function check_isbn( isbn_str )
  979. if nil ~= isbn_str:match("[^%s-0-9X]") then return false; end -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X
  980. isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces
  981. local len = isbn_str:len();
  982. if len ~= 10 and len ~= 13 then
  983. return false;
  984. end
  985.  
  986. if len == 10 then
  987. if isbn_str:match( "^%d*X?$" ) == nil then return false; end
  988. return is_valid_isxn(isbn_str, 10);
  989. else
  990. local temp = 0;
  991. if isbn_str:match( "^97[89]%d*$" ) == nil then return false; end -- isbn13 begins with 978 or 979; ismn begins with 979
  992. return is_valid_isxn_13 (isbn_str);
  993. end
  994. end
  995.  
  996. --[[--------------------------< C H E C K _ I S M N >------------------------------------------------------------
  997.  
  998. Determines whether an ISMN string is valid. Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the
  999. same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf
  1000. section 2, pages 9–12.
  1001.  
  1002. ]]
  1003.  
  1004. local function ismn (id)
  1005. local handler = cfg.id_handlers['ISMN'];
  1006. local text;
  1007. local valid_ismn = true;
  1008.  
  1009. id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn
  1010.  
  1011. if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790
  1012. valid_ismn = false;
  1013. else
  1014. valid_ismn=is_valid_isxn_13 (id); -- validate ismn
  1015. end
  1016.  
  1017. -- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to
  1018. -- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
  1019. text="[[" .. handler.link .. "|" .. handler.label .. "]]" .. handler.separator .. id; -- because no place to link to yet
  1020.  
  1021. if false == valid_ismn then
  1022. text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the issn is invalid
  1023. end
  1024. return text;
  1025. end
  1026.  
  1027. --[[--------------------------< I S S N >----------------------------------------------------------------------
  1028.  
  1029. Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but has separated the two groups of four
  1030. digits with a space. When that condition occurred, the resulting link looked like this:
  1031.  
  1032. |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link
  1033. This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length and makes sure that the checkdigit agrees
  1034. with the calculated value. Incorrect length (8 digits), characters other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn
  1035. error message. The issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits.
  1036.  
  1037. ]]
  1038.  
  1039. local function issn(id, e)
  1040. local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate
  1041. local handler;
  1042. local text;
  1043. local valid_issn = true;
  1044. if e then
  1045. handler = cfg.id_handlers['EISSN'];
  1046. else
  1047. handler = cfg.id_handlers['ISSN'];
  1048. end
  1049.  
  1050. id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn
  1051.  
  1052. if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position
  1053. valid_issn=false; -- wrong length or improper character
  1054. else
  1055. valid_issn=is_valid_isxn(id, 8); -- validate issn
  1056. end
  1057.  
  1058. if true == valid_issn then
  1059. id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version
  1060. else
  1061. id = issn_copy; -- if not valid, use the show the invalid issn with error message
  1062. end
  1063. text = external_link_id({link = handler.link, label = handler.label,
  1064. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
  1065. if false == valid_issn then
  1066. text = text .. ' ' .. set_error( 'bad_issn' ) -- add an error message if the issn is invalid
  1067. end
  1068. return text
  1069. end
  1070.  
  1071. --[[--------------------------< A M A Z O N >------------------------------------------------------------------
  1072.  
  1073. Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha
  1074. characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit
  1075. isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=.
  1076. Error message if not 10 characters, if not isbn10, if mixed and first character is a digit.
  1077.  
  1078. ]]
  1079.  
  1080. local function amazon(id, domain)
  1081. local err_cat = ""
  1082.  
  1083. if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then
  1084. err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters
  1085. else
  1086. if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X)
  1087. if check_isbn( id ) then -- see if asin value is isbn10
  1088. add_maint_cat ('ASIN');
  1089. elseif not is_set (err_cat) then
  1090. err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10
  1091. end
  1092. elseif not id:match("^%u[%d%u]+$") then
  1093. err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha
  1094. end
  1095. end
  1096. if not is_set(domain) then
  1097. domain = "com";
  1098. elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom
  1099. domain = "co." .. domain;
  1100. elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico
  1101. domain = "com." .. domain;
  1102. end
  1103. local handler = cfg.id_handlers['ASIN'];
  1104. return external_link_id({link=handler.link,
  1105. label=handler.label, prefix=handler.prefix .. domain .. "/dp/",
  1106. id=id, encode=handler.encode, separator = handler.separator}) .. err_cat;
  1107. end
  1108.  
  1109. --[[--------------------------< A R X I V >--------------------------------------------------------------------
  1110.  
  1111. See: http://arxiv.org/help/arxiv_identifier
  1112.  
  1113. format and error check arXiv identifier. There are three valid forms of the identifier:
  1114. the first form, valid only between date codes 9108 and 0703 is:
  1115. arXiv:<archive>.<class>/<date code><number><version>
  1116. where:
  1117. <archive> is a string of alpha characters - may be hyphenated; no other punctuation
  1118. <class> is a string of alpha characters - may be hyphenated; no other punctuation
  1119. <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
  1120. first digit of YY for this form can only 9 and 0
  1121. <number> is a three-digit number
  1122. <version> is a 1 or more digit number preceded with a lowercase v; no spaces (undocumented)
  1123. the second form, valid from April 2007 through December 2014 is:
  1124. arXiv:<date code>.<number><version>
  1125. where:
  1126. <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01
  1127. <number> is a four-digit number
  1128. <version> is a 1 or more digit number preceded with a lowercase v; no spaces
  1129.  
  1130. the third form, valid from January 2015 is:
  1131. arXiv:<date code>.<number><version>
  1132. where:
  1133. <date code> and <version> are as defined for 0704-1412
  1134. <number> is a five-digit number
  1135. ]]
  1136.  
  1137. local function arxiv (id, class)
  1138. local handler = cfg.id_handlers['ARXIV'];
  1139. local year, month, version;
  1140. local err_cat = '';
  1141. local text;
  1142. if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version
  1143. year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$");
  1144. year = tonumber(year);
  1145. month = tonumber(month);
  1146. if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month
  1147. ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok?
  1148. err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
  1149. end
  1150. elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version
  1151. year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$");
  1152. year = tonumber(year);
  1153. month = tonumber(month);
  1154. if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years)
  1155. ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)?
  1156. err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
  1157. end
  1158. elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version
  1159. year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$");
  1160. year = tonumber(year);
  1161. month = tonumber(month);
  1162. if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years)
  1163. err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message
  1164. end
  1165. else
  1166. err_cat = ' ' .. set_error( 'bad_arxiv' ); -- arXiv id doesn't match any format
  1167. end
  1168.  
  1169. text = external_link_id({link = handler.link, label = handler.label,
  1170. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
  1171.  
  1172. if is_set (class) then
  1173. class = ' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]'; -- external link within square brackets, not wikilink
  1174. else
  1175. class = ''; -- empty string for concatenation
  1176. end
  1177. return text .. class;
  1178. end
  1179.  
  1180. --[[
  1181. lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization)
  1182. 1. Remove all blanks.
  1183. 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash.
  1184. 3. If there is a hyphen in the string:
  1185. a. Remove it.
  1186. b. Inspect the substring following (to the right of) the (removed) hyphen. Then (and assuming that steps 1 and 2 have been carried out):
  1187. 1. All these characters should be digits, and there should be six or less. (not done in this function)
  1188. 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six.
  1189.  
  1190. Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function.
  1191. ]]
  1192.  
  1193. local function normalize_lccn (lccn)
  1194. lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace
  1195.  
  1196. if nil ~= string.find (lccn,'/') then
  1197. lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it
  1198. end
  1199.  
  1200. local prefix
  1201. local suffix
  1202. prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix
  1203.  
  1204. if nil ~= suffix then -- if there was a hyphen
  1205. suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6
  1206. lccn=prefix..suffix; -- reassemble the lccn
  1207. end
  1208. return lccn;
  1209. end
  1210.  
  1211. --[[
  1212. Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of the LCCN dictates the character type of the first 1-3 characters; the
  1213. rightmost eight are always digits. http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
  1214.  
  1215. length = 8 then all digits
  1216. length = 9 then lccn[1] is lower case alpha
  1217. length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits
  1218. length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits
  1219. length = 12 then lccn[1] and lccn[2] are both lower case alpha
  1220.  
  1221. ]]
  1222.  
  1223. local function lccn(lccn)
  1224. local handler = cfg.id_handlers['LCCN'];
  1225. local err_cat = ''; -- presume that LCCN is valid
  1226. local id = lccn; -- local copy of the lccn
  1227.  
  1228. id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes)
  1229. local len = id:len(); -- get the length of the lccn
  1230.  
  1231. if 8 == len then
  1232. if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits)
  1233. err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
  1234. end
  1235. elseif 9 == len then -- LCCN should be adddddddd
  1236. if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern?
  1237. err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message
  1238. end
  1239. elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd
  1240. if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ...
  1241. if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern
  1242. err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
  1243. end
  1244. end
  1245. elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd
  1246. if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns
  1247. err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
  1248. end
  1249. elseif 12 == len then -- LCCN should be aadddddddddd
  1250. if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern
  1251. err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message
  1252. end
  1253. else
  1254. err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message
  1255. end
  1256.  
  1257. if not is_set (err_cat) and nil ~= lccn:find ('%s') then
  1258. err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message
  1259. end
  1260.  
  1261. return external_link_id({link = handler.link, label = handler.label,
  1262. prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat;
  1263. end
  1264.  
  1265. --[[--------------------------< P M I D >----------------------------------------------------------------------
  1266.  
  1267. Format PMID and do simple error checking. PMIDs are sequential numbers beginning at 1 and counting up. This
  1268. code checks the PMID to see that it contains only digits and is less than test_limit; the value in local variable
  1269. test_limit will need to be updated periodically as more PMIDs are issued.
  1270.  
  1271. ]]
  1272.  
  1273. local function pmid(id)
  1274. local test_limit = 33000000; -- update this value as PMIDs approach
  1275. local handler = cfg.id_handlers['PMID'];
  1276. local err_cat = ''; -- presume that PMID is valid
  1277. if id:match("[^%d]") then -- if PMID has anything but digits
  1278. err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
  1279. else -- PMID is only digits
  1280. local id_num = tonumber(id); -- convert id to a number for range testing
  1281. if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries
  1282. err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message
  1283. end
  1284. end
  1285. return external_link_id({link = handler.link, label = handler.label,
  1286. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
  1287. end
  1288.  
  1289. --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------
  1290.  
  1291. Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is
  1292. in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because
  1293. |embargo= was not set in this cite.
  1294.  
  1295. ]]
  1296.  
  1297. local function is_embargoed (embargo)
  1298. if is_set (embargo) then
  1299. local lang = mw.getContentLanguage();
  1300. local good1, embargo_date, good2, todays_date;
  1301. good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo );
  1302. good2, todays_date = pcall( lang.formatDate, lang, 'U' );
  1303. if good1 and good2 then -- if embargo date and today's date are good dates
  1304. if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future?
  1305. return embargo; -- still embargoed
  1306. else
  1307. add_maint_cat ('embargo')
  1308. return ''; -- unset because embargo has expired
  1309. end
  1310. end
  1311. end
  1312. return ''; -- |embargo= not set return empty string
  1313. end
  1314.  
  1315. --[[--------------------------< P M C >------------------------------------------------------------------------
  1316.  
  1317. Format a PMC, do simple error checking, and check for embargoed articles.
  1318.  
  1319. The embargo parameter takes a date for a value. If the embargo date is in the future the PMC identifier will not
  1320. be linked to the article. If the embargo date is today or in the past, or if it is empty or omitted, then the
  1321. PMC identifier is linked to the article through the link at cfg.id_handlers['PMC'].prefix.
  1322.  
  1323. PMC embargo date testing is done in function is_embargoed () which is called earlier because when the citation
  1324. has |pmc=<value> but does not have a |url= then |title= is linked with the PMC link. Function is_embargoed ()
  1325. returns the embargo date if the PMC article is still embargoed, otherwise it returns an empty string.
  1326.  
  1327. PMCs are sequential numbers beginning at 1 and counting up. This code checks the PMC to see that it contains only digits and is less
  1328. than test_limit; the value in local variable test_limit will need to be updated periodically as more PMCs are issued.
  1329.  
  1330. ]]
  1331.  
  1332. local function pmc(id, embargo)
  1333. local test_limit = 7000000; -- update this value as PMCs approach
  1334. local handler = cfg.id_handlers['PMC'];
  1335. local err_cat = ''; -- presume that PMC is valid
  1336. local id_num;
  1337. local text;
  1338. id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix
  1339.  
  1340. if is_set (id_num) then
  1341. add_maint_cat ('pmc_format');
  1342. else -- plain number without pmc prefix
  1343. id_num = id:match ('^%d+$'); -- if here id is all digits
  1344. end
  1345.  
  1346. if is_set (id_num) then -- id_num has a value so test it
  1347. id_num = tonumber(id_num); -- convert id_num to a number for range testing
  1348. if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries
  1349. err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
  1350. else
  1351. id = tostring (id_num); -- make sure id is a string
  1352. end
  1353. else -- when id format incorrect
  1354. err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message
  1355. end
  1356. if is_set (embargo) then -- is PMC is still embargoed?
  1357. text = table.concat ( -- still embargoed so no external link
  1358. {
  1359. make_wikilink (handler.link, handler.label),
  1360. handler.separator,
  1361. id,
  1362. err_cat
  1363. });
  1364. else
  1365. text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article
  1366. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat;
  1367. end
  1368. return text;
  1369. end
  1370.  
  1371. --[[--------------------------< D O I >------------------------------------------------------------------------
  1372.  
  1373. Formats a DOI and checks for DOI errors.
  1374.  
  1375. DOI names contain two parts: prefix and suffix separated by a forward slash.
  1376. Prefix: directory indicator '10.' followed by a registrant code
  1377. Suffix: character string of any length chosen by the registrant
  1378.  
  1379. This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes, or, if it ends
  1380. with a period or a comma, this function will emit a bad_doi error message.
  1381.  
  1382. DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash,
  1383. and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
  1384. if ever used in doi names.
  1385.  
  1386. ]]
  1387.  
  1388. local function doi(id, inactive)
  1389. local cat = ""
  1390. local handler = cfg.id_handlers['DOI'];
  1391. local text;
  1392. if is_set(inactive) then
  1393. local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date
  1394. if is_set(inactive_year) then
  1395. table.insert( z.error_categories, "自" .. inactive_year .. "年含有不活躍DOI的頁面" );
  1396. else
  1397. table.insert( z.error_categories, "含有不活躍DOI的頁面" ); -- when inactive doesn't contain a recognizable year
  1398. end
  1399. inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")"
  1400. end
  1401. text = external_link_id({link = handler.link, label = handler.label,
  1402. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. (inactive or '')
  1403.  
  1404. if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma
  1405. cat = ' ' .. set_error( 'bad_doi' );
  1406. end
  1407.  
  1408. return text .. cat
  1409. end
  1410.  
  1411.  
  1412. --[[--------------------------< H D L >------------------------------------------------------------------------
  1413.  
  1414. Formats an HDL with minor error checking.
  1415.  
  1416. HDL names contain two parts: prefix and suffix separated by a forward slash.
  1417. Prefix: character string using any character in the UCS-2 character set except '/'
  1418. Suffix: character string of any length using any character in the UCS-2 character set chosen by the registrant
  1419.  
  1420. This function checks a HDL name for: prefix/suffix. If the HDL name contains spaces, endashes, or, if it ends
  1421. with a period or a comma, this function will emit a bad_hdl error message.
  1422.  
  1423. HDL names are case-insensitive and can incorporate any printable Unicode characters so the test for endashes and
  1424. terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely
  1425. if ever used in HDLs.
  1426.  
  1427. ]]
  1428.  
  1429. local function hdl(id)
  1430. local handler = cfg.id_handlers['HDL'];
  1431. local text = external_link_id({link = handler.link, label = handler.label,
  1432. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
  1433.  
  1434. if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma
  1435. text = text .. ' ' .. set_error( 'bad_hdl' );
  1436. end
  1437. return text;
  1438. end
  1439.  
  1440. --[[--------------------------< O P E N L I B R A R Y >--------------------------------------------------------
  1441.  
  1442. Formats an OpenLibrary link, and checks for associated errors.
  1443.  
  1444. ]]
  1445. local function openlibrary(id)
  1446. local code = id:match("^%d+([AMW])$"); -- only digits followed by 'A', 'M', or 'W'
  1447. local handler = cfg.id_handlers['OL'];
  1448.  
  1449. if ( code == "A" ) then
  1450. return external_link_id({link=handler.link, label=handler.label,
  1451. prefix=handler.prefix .. 'authors/OL',
  1452. id=id, separator=handler.separator, encode = handler.encode})
  1453. elseif ( code == "M" ) then
  1454. return external_link_id({link=handler.link, label=handler.label,
  1455. prefix=handler.prefix .. 'books/OL',
  1456. id=id, separator=handler.separator, encode = handler.encode})
  1457. elseif ( code == "W" ) then
  1458. return external_link_id({link=handler.link, label=handler.label,
  1459. prefix=handler.prefix .. 'works/OL',
  1460. id=id, separator=handler.separator, encode = handler.encode})
  1461. else
  1462. return external_link_id({link=handler.link, label=handler.label,
  1463. prefix=handler.prefix .. 'OL',
  1464. id=id, separator=handler.separator, encode = handler.encode}) .. ' ' .. set_error( 'bad_ol' );
  1465. end
  1466. end
  1467.  
  1468.  
  1469. --[[--------------------------< M E S S A G E _ I D >----------------------------------------------------------
  1470.  
  1471. Validate and format a usenet message id. Simple error checking, looks for 'id-left@id-right' not enclosed in
  1472. '<' and/or '>' angle brackets.
  1473.  
  1474. ]]
  1475.  
  1476. local function message_id (id)
  1477. local handler = cfg.id_handlers['USENETID'];
  1478.  
  1479. text = external_link_id({link = handler.link, label = handler.label,
  1480. prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode})
  1481. if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>'
  1482. text = text .. ' ' .. set_error( 'bad_message_id' ) -- add an error message if the message id is invalid
  1483. end
  1484. return text
  1485. end
  1486.  
  1487. --[[--------------------------< S E T _ T I T L E T Y P E >----------------------------------------------------
  1488.  
  1489. This function sets default title types (equivalent to the citation including |type=<default value>) for those templates that have defaults.
  1490. Also handles the special case where it is desirable to omit the title type from the rendered citation (|type=none).
  1491.  
  1492. ]]
  1493.  
  1494. local function set_titletype (cite_class, title_type)
  1495. if is_set(title_type) then
  1496. if "none" == title_type then
  1497. title_type = ""; -- if |type=none then type parameter not displayed
  1498. end
  1499. return title_type; -- if |type= has been set to any other value use that value
  1500. end
  1501.  
  1502. return cfg.title_types [cite_class] or ''; -- set template's default title type; else empty string for concatenation
  1503. end
  1504.  
  1505. --[[--------------------------< C L E A N _ I S B N >----------------------------------------------------------
  1506.  
  1507. Removes irrelevant text and dashes from ISBN number
  1508. Similar to that used for Special:BookSources
  1509.  
  1510. ]]
  1511.  
  1512. local function clean_isbn( isbn_str )
  1513. return isbn_str:gsub( "[^-0-9X]", "" );
  1514. end
  1515.  
  1516. --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
  1517.  
  1518. Returns a string where all of lua's magic characters have been escaped. This is important because functions like
  1519. string.gsub() treat their pattern and replace strings as patterns, not literal strings.
  1520. ]]
  1521. local function escape_lua_magic_chars (argument)
  1522. argument = argument:gsub("%%", "%%%%"); -- replace % with %%
  1523. argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
  1524. return argument;
  1525. end
  1526.  
  1527. --[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >--------------------------------
  1528.  
  1529. Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata.
  1530. This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to
  1531. markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind.
  1532.  
  1533. ]]
  1534.  
  1535. local function strip_apostrophe_markup (argument)
  1536. if not is_set (argument) then return argument; end
  1537.  
  1538. while true do
  1539. if argument:match ("%'%'%'%'%'") then -- bold italic (5)
  1540. argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it
  1541. elseif argument:match ("%'%'%'%'") then -- italic start and end without content (4)
  1542. argument=argument:gsub("%'%'%'%'", "");
  1543. elseif argument:match ("%'%'%'") then -- bold (3)
  1544. argument=argument:gsub("%'%'%'", "");
  1545. elseif argument:match ("%'%'") then -- italic (2)
  1546. argument=argument:gsub("%'%'", "");
  1547. else
  1548. break;
  1549. end
  1550. end
  1551. return argument; -- done
  1552. end
  1553.  
  1554. --[[--------------------------< M A K E _ C O I N S _ T I T L E >----------------------------------------------
  1555.  
  1556. Makes a title for COinS from Title and / or ScriptTitle (or any other name-script pairs)
  1557.  
  1558. Apostrophe markup (bold, italics) is stripped from each value so that the COinS metadata isn't correupted with strings
  1559. of %27%27...
  1560. ]]
  1561.  
  1562. local function make_coins_title (title, script)
  1563. if is_set (title) then
  1564. title = strip_apostrophe_markup (title); -- strip any apostrophe markup
  1565. else
  1566. title=''; -- if not set, make sure title is an empty string
  1567. end
  1568. if is_set (script) then
  1569. script = script:gsub ('^%l%l%s*:%s*', ''); -- remove language prefix if present (script value may now be empty string)
  1570. script = strip_apostrophe_markup (script); -- strip any apostrophe markup
  1571. else
  1572. script=''; -- if not set, make sure script is an empty string
  1573. end
  1574. if is_set (title) and is_set (script) then
  1575. script = ' ' .. script; -- add a space before we concatenate
  1576. end
  1577. return title .. script; -- return the concatenation
  1578. end
  1579.  
  1580. --[[--------------------------< G E T _ C O I N S _ P A G E S >------------------------------------------------
  1581.  
  1582. Extract page numbers from external wikilinks in any of the |page=, |pages=, or |at= parameters for use in COinS.
  1583.  
  1584. ]]
  1585.  
  1586. local function get_coins_pages (pages)
  1587. local pattern;
  1588. if not is_set (pages) then return pages; end -- if no page numbers then we're done
  1589. while true do
  1590. pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url "
  1591. if nil == pattern then break; end -- no more urls
  1592. pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters
  1593. pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible
  1594. end
  1595. pages = pages:gsub("[%[%]]", ""); -- remove the brackets
  1596. pages = pages:gsub("–", "-" ); -- replace endashes with hyphens
  1597. pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (&ndash; etc.) with hyphens; do we need to replace numerical entities like &#32; and the like?
  1598. return pages;
  1599. end
  1600.  
  1601. -- Gets the display text for a wikilink like [[A|B]] or [[B]] gives B
  1602. local function remove_wiki_link( str )
  1603. return (str:gsub( "%[%[([^%[%]]*)%]%]", function(l)
  1604. return l:gsub( "^[^|]*|(.*)$", "%1" ):gsub("^%s*(.-)%s*$", "%1");
  1605. end));
  1606. end
  1607.  
  1608. -- Converts a hyphen to a dash
  1609. local function hyphen_to_dash( str )
  1610. if not is_set(str) or str:match( "[%[%]{}<>]" ) ~= nil then
  1611. return str;
  1612. end
  1613. return str:gsub( '-', '–' );
  1614. end
  1615.  
  1616. --[[--------------------------< S A F E _ J O I N >------------------------------------------------------------
  1617.  
  1618. Joins a sequence of strings together while checking for duplicate separation characters.
  1619.  
  1620. ]]
  1621.  
  1622. local function safe_join( tbl, duplicate_char )
  1623. --[[
  1624. Note: we use string functions here, rather than ustring functions.
  1625. This has considerably faster performance and should work correctly as
  1626. long as the duplicate_char is strict ASCII. The strings
  1627. in tbl may be ASCII or UTF8.
  1628. ]]
  1629. local str = ''; -- the output string
  1630. local comp = ''; -- what does 'comp' mean?
  1631. local end_chr = '';
  1632. local trim;
  1633. for _, value in ipairs( tbl ) do
  1634. if value == nil then value = ''; end
  1635. if str == '' then -- if output string is empty
  1636. str = value; -- assign value to it (first time through the loop)
  1637. elseif value ~= '' then
  1638. if value:sub(1,1) == '<' then -- Special case of values enclosed in spans and other markup.
  1639. comp = value:gsub( "%b<>", "" ); -- remove html markup (<span>string</span> -> string)
  1640. else
  1641. comp = value;
  1642. end
  1643. -- typically duplicate_char is sepc
  1644. if comp:sub(1,1) == duplicate_char then -- is first charactier same as duplicate_char? why test first character?
  1645. -- Because individual string segments often (always?) begin with terminal punct for th
  1646. -- preceding segment: 'First element' .. 'sepc next element' .. etc?
  1647. trim = false;
  1648. end_chr = str:sub(-1,-1); -- get the last character of the output string
  1649. -- str = str .. "<HERE(enchr=" .. end_chr.. ")" -- debug stuff?
  1650. if end_chr == duplicate_char then -- if same as separator
  1651. str = str:sub(1,-2); -- remove it
  1652. elseif end_chr == "'" then -- if it might be wikimarkup
  1653. if str:sub(-3,-1) == duplicate_char .. "''" then -- if last three chars of str are sepc''
  1654. str = str:sub(1, -4) .. "''"; -- remove them and add back ''
  1655. elseif str:sub(-5,-1) == duplicate_char .. "]]''" then -- if last five chars of str are sepc]]''
  1656. trim = true; -- why? why do this and next differently from previous?
  1657. elseif str:sub(-4,-1) == duplicate_char .. "]''" then -- if last four chars of str are sepc]''
  1658. trim = true; -- same question
  1659. end
  1660. elseif end_chr == "]" then -- if it might be wikimarkup
  1661. if str:sub(-3,-1) == duplicate_char .. "]]" then -- if last three chars of str are sepc]] wikilink
  1662. trim = true;
  1663. elseif str:sub(-2,-1) == duplicate_char .. "]" then -- if last two chars of str are sepc] external link
  1664. trim = true;
  1665. elseif str:sub(-4,-1) == duplicate_char .. "'']" then -- normal case when |url=something & |title=Title.
  1666. trim = true;
  1667. end
  1668. elseif end_chr == " " then -- if last char of output string is a space
  1669. if str:sub(-2,-1) == duplicate_char .. " " then -- if last two chars of str are <sepc><space>
  1670. str = str:sub(1,-3); -- remove them both
  1671. end
  1672. end
  1673.  
  1674. if trim then
  1675. if value ~= comp then -- value does not equal comp when value contains html markup
  1676. local dup2 = duplicate_char;
  1677. if dup2:match( "%A" ) then dup2 = "%" .. dup2; end -- if duplicate_char not a letter then escape it
  1678. value = value:gsub( "(%b<>)" .. dup2, "%1", 1 ) -- remove duplicate_char if it follows html markup
  1679. else
  1680. value = value:sub( 2, -1 ); -- remove duplicate_char when it is first character
  1681. end
  1682. end
  1683. end
  1684. str = str .. value; --add it to the output string
  1685. end
  1686. end
  1687. return str;
  1688. end
  1689.  
  1690. --[[--------------------------< I S _ G O O D _ V A N C _ N A M E >--------------------------------------------
  1691.  
  1692. For Vancouver Style, author/editor names are supposed to be rendered in Latin (read ASCII) characters. When a name
  1693. uses characters that contain diacritical marks, those characters are to converted to the corresponding Latin character.
  1694. When a name is written using a non-Latin alphabet or logogram, that name is to be transliterated into Latin characters.
  1695. These things are not currently possible in this module so are left to the editor to do.
  1696.  
  1697. This test allows |first= and |last= names to contain any of the letters defined in the four Unicode Latin character sets
  1698. [http://www.unicode.org/charts/PDF/U0000.pdf C0 Controls and Basic Latin] 0041–005A, 0061–007A
  1699. [http://www.unicode.org/charts/PDF/U0080.pdf C1 Controls and Latin-1 Supplement] 00C0–00D6, 00D8–00F6, 00F8–00FF
  1700. [http://www.unicode.org/charts/PDF/U0100.pdf Latin Extended-A] 0100–017F
  1701. [http://www.unicode.org/charts/PDF/U0180.pdf Latin Extended-B] 0180–01BF, 01C4–024F
  1702.  
  1703. |lastn= also allowed to contain hyphens, spaces, and apostrophes. (http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35029/)
  1704. |firstn= also allowed to contain hyphens, spaces, apostrophes, and periods
  1705.  
  1706. At the time of this writing, I had to write the 'if nil == mw.ustring.find ...' test ouside of the code editor and paste it here
  1707. because the code editor gets confused between character insertion point and cursor position.
  1708.  
  1709. ]]
  1710.  
  1711. local function is_good_vanc_name (last, first)
  1712. if nil == mw.ustring.find (last, "^[A-Za-zÀ-ÖØ-öø-ƿDŽ-ɏ%-%s%']*$") or nil == mw.ustring.find (first, "^[A-Za-zÀ-ÖØ-öø-ƿDŽ-ɏ%-%s%'%.]*$") then
  1713. add_vanc_error ();
  1714. return false; -- not a string of latin characters; Vancouver required Romanization
  1715. end;
  1716. return true;
  1717. end
  1718.  
  1719. --[[--------------------------< R E D U C E _ T O _ I N I T I A L S >------------------------------------------
  1720.  
  1721. Attempts to convert names to initials in support of |name-list-format=vanc.
  1722.  
  1723. Names in |firstn= may be separated by spaces or hyphens, or for initials, a period. See http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35062/.
  1724.  
  1725. Vancouver style requires family rank designations (Jr, II, III, etc) to be rendered as Jr, 2nd, 3rd, etc. This form is not
  1726. currently supported by this code so correctly formed names like Smith JL 2nd are converted to Smith J2. See http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35085/.
  1727.  
  1728. This function uses ustring functions because firstname initials may be any of the unicode Latin characters accepted by is_good_vanc_name ().
  1729.  
  1730. ]]
  1731.  
  1732. local function reduce_to_initials(first)
  1733. if mw.ustring.match(first, "^%u%u$") then return first end; -- when first contains just two upper-case letters, nothing to do
  1734. local initials = {}
  1735. local i = 0; -- counter for number of initials
  1736. for word in mw.ustring.gmatch(first, "[^%s%.%-]+") do -- names separated by spaces, hyphens, or periods
  1737. table.insert(initials, mw.ustring.sub(word,1,1)) -- Vancouver format does not include full stops.
  1738. i = i + 1; -- bump the counter
  1739. if 2 <= i then break; end -- only two initials allowed in Vancouver system; if 2, quit
  1740. end
  1741. return table.concat(initials) -- Vancouver format does not include spaces.
  1742. end
  1743.  
  1744. --[[--------------------------< L I S T _ P E O P L E >-------------------------------------------------------
  1745.  
  1746. Formats a list of people (e.g. authors / editors)
  1747.  
  1748. ]]
  1749.  
  1750. local function list_people(control, people, etal, list_name) -- TODO: why is list_name here? not used in this function
  1751. local sep;
  1752. local namesep;
  1753. local format = control.format
  1754. local maximum = control.maximum
  1755. local lastauthoramp = control.lastauthoramp;
  1756. local text = {}
  1757.  
  1758. if 'vanc' == format then -- Vancouver-like author/editor name styling?
  1759. sep = ','; -- name-list separator between authors is a comma
  1760. namesep = ' '; -- last/first separator is a space
  1761. else
  1762. sep = ';' -- name-list separator between authors is a semicolon
  1763. namesep = ', ' -- last/first separator is <comma><space>
  1764. end
  1765. if sep:sub(-1,-1) ~= " " then sep = sep .. " " end
  1766. if is_set (maximum) and maximum < 1 then return "", 0; end -- returned 0 is for EditorCount; not used for authors
  1767. for i,person in ipairs(people) do
  1768. if is_set(person.last) then
  1769. local mask = person.mask
  1770. local one
  1771. local sep_one = sep;
  1772. if is_set (maximum) and i > maximum then
  1773. etal = true;
  1774. break;
  1775. elseif (mask ~= nil) then
  1776. local n = tonumber(mask)
  1777. if (n ~= nil) then
  1778. one = string.rep("&mdash;",n)
  1779. else
  1780. one = mask;
  1781. sep_one = " ";
  1782. end
  1783. else
  1784. one = person.last
  1785. local first = person.first
  1786. if is_set(first) then
  1787. if ( "vanc" == format ) then -- if vancouver format
  1788. one = one:gsub ('%.', ''); -- remove periods from surnames (http://www.ncbi.nlm.nih.gov/books/NBK7271/box/A35029/)
  1789. if not person.corporate and is_good_vanc_name (one, first) then -- and name is all Latin characters; corporate authors not tested
  1790. first = reduce_to_initials(first) -- attempt to convert first name(s) to initials
  1791. end
  1792. end
  1793. one = one .. namesep .. first
  1794. end
  1795. if is_set(person.link) and person.link ~= control.page_name then
  1796. one = "[[" .. person.link .. "|" .. one .. "]]" -- link author/editor if this page is not the author's/editor's page
  1797. end
  1798. end
  1799. table.insert( text, one )
  1800. table.insert( text, sep_one )
  1801. end
  1802. end
  1803.  
  1804. local count = #text / 2; -- (number of names + number of separators) divided by 2
  1805. if count > 0 then
  1806. if count > 1 and is_set(lastauthoramp) and not etal then
  1807. text[#text-2] = " & "; -- replace last separator with ampersand text
  1808. end
  1809. text[#text] = nil; -- erase the last separator
  1810. end
  1811. local result = table.concat(text) -- construct list
  1812. if etal and is_set (result) then -- etal may be set by |display-authors=etal but we might not have a last-first list
  1813. result = result .. sep .. ' ' .. cfg.messages['et al']; -- we've go a last-first list and etal so add et al.
  1814. end
  1815. return result, count
  1816. end
  1817.  
  1818. --[[--------------------------< A N C H O R _ I D >------------------------------------------------------------
  1819.  
  1820. Generates a CITEREF anchor ID if we have at least one name or a date. Otherwise returns an empty string.
  1821.  
  1822. namelist is one of the contributor-, author-, or editor-name lists chosen in that order. year is Year or anchor_year.
  1823.  
  1824. ]]
  1825. local function anchor_id (namelist, year)
  1826. local names={}; -- a table for the one to four names and year
  1827. for i,v in ipairs (namelist) do -- loop through the list and take up to the first four last names
  1828. names[i] = v.last
  1829. if i == 4 then break end -- if four then done
  1830. end
  1831. table.insert (names, year); -- add the year at the end
  1832. local id = table.concat(names); -- concatenate names and year for CITEREF id
  1833. if is_set (id) then -- if concatenation is not an empty string
  1834. return "CITEREF" .. id; -- add the CITEREF portion
  1835. else
  1836. return ''; -- return an empty string; no reason to include CITEREF id in this citation
  1837. end
  1838. end
  1839.  
  1840.  
  1841. --[[--------------------------< N A M E _ H A S _ E T A L >----------------------------------------------------
  1842.  
  1843. Evaluates the content of author and editor name parameters for variations on the theme of et al. If found,
  1844. the et al. is removed, a flag is set to true and the function returns the modified name and the flag.
  1845.  
  1846. This function never sets the flag to false but returns it's previous state because it may have been set by
  1847. previous passes through this function or by the parameters |display-authors=etal or |display-editors=etal
  1848.  
  1849. ]]
  1850.  
  1851. local function name_has_etal (name, etal, nocat)
  1852.  
  1853. if is_set (name) then -- name can be nil in which case just return
  1854. local etal_pattern = "[;,]? *[\"']*%f[%a][Ee][Tt] *[Aa][Ll][%.\"']*$" -- variations on the 'et al' theme
  1855. local others_pattern = "[;,]? *%f[%a]and [Oo]thers"; -- and alternate to et al.
  1856. if name:match (etal_pattern) then -- variants on et al.
  1857. name = name:gsub (etal_pattern, ''); -- if found, remove
  1858. etal = true; -- set flag (may have been set previously here or by |display-authors=etal)
  1859. if not nocat then -- no categorization for |vauthors=
  1860. add_maint_cat ('etal'); -- and add a category if not already added
  1861. end
  1862. elseif name:match (others_pattern) then -- if not 'et al.', then 'and others'?
  1863. name = name:gsub (others_pattern, ''); -- if found, remove
  1864. etal = true; -- set flag (may have been set previously here or by |display-authors=etal)
  1865. if not nocat then -- no categorization for |vauthors=
  1866. add_maint_cat ('etal'); -- and add a category if not already added
  1867. end
  1868. end
  1869. end
  1870. return name, etal; --
  1871. end
  1872.  
  1873. --[[--------------------------< E X T R A C T _ N A M E S >----------------------------------------------------
  1874. Gets name list from the input arguments
  1875.  
  1876. Searches through args in sequential order to find |lastn= and |firstn= parameters (or their aliases), and their matching link and mask parameters.
  1877. Stops searching when both |lastn= and |firstn= are not found in args after two sequential attempts: found |last1=, |last2=, and |last3= but doesn't
  1878. find |last4= and |last5= then the search is done.
  1879.  
  1880. This function emits an error message when there is a |firstn= without a matching |lastn=. When there are 'holes' in the list of last names, |last1= and |last3=
  1881. are present but |last2= is missing, an error message is emitted. |lastn= is not required to have a matching |firstn=.
  1882.  
  1883. When an author or editor parameter contains some form of 'et al.', the 'et al.' is stripped from the parameter and a flag (etal) returned
  1884. that will cause list_people() to add the static 'et al.' text from Module:Citation/CS1/Configuration. This keeps 'et al.' out of the
  1885. template's metadata. When this occurs, the page is added to a maintenance category.
  1886.  
  1887. ]]
  1888.  
  1889. local function extract_names(args, list_name)
  1890. local names = {}; -- table of names
  1891. local last; -- individual name components
  1892. local first;
  1893. local link;
  1894. local mask;
  1895. local i = 1; -- loop counter/indexer
  1896. local n = 1; -- output table indexer
  1897. local count = 0; -- used to count the number of times we haven't found a |last= (or alias for authors, |editor-last or alias for editors)
  1898. local etal=false; -- return value set to true when we find some form of et al. in an author parameter
  1899.  
  1900. local err_msg_list_name = list_name:match ("(%w+)List") .. 's list'; -- modify AuthorList or EditorList for use in error messages if necessary
  1901. while true do
  1902. last = select_one( args, cfg.aliases[list_name .. '-Last'], 'redundant_parameters', i ); -- search through args for name components beginning at 1
  1903. first = select_one( args, cfg.aliases[list_name .. '-First'], 'redundant_parameters', i );
  1904. link = select_one( args, cfg.aliases[list_name .. '-Link'], 'redundant_parameters', i );
  1905. mask = select_one( args, cfg.aliases[list_name .. '-Mask'], 'redundant_parameters', i );
  1906.  
  1907. last, etal = name_has_etal (last, etal, false); -- find and remove variations on et al.
  1908. first, etal = name_has_etal (first, etal, false); -- find and remove variations on et al.
  1909.  
  1910. if first and not last then -- if there is a firstn without a matching lastn
  1911. table.insert( z.message_tail, { set_error( 'first_missing_last', {err_msg_list_name, i}, true ) } ); -- add this error message
  1912. elseif not first and not last then -- if both firstn and lastn aren't found, are we done?
  1913. count = count + 1; -- number of times we haven't found last and first
  1914. if 2 <= count then -- two missing names and we give up
  1915. break; -- normal exit or there is a two-name hole in the list; can't tell which
  1916. end
  1917. else -- we have last with or without a first
  1918. if is_set (link) and false == link_param_ok (link) then -- do this test here in case link is missing last
  1919. table.insert( z.message_tail, { set_error( 'bad_paramlink', list_name:match ("(%w+)List"):lower() .. '-link' .. i )}); -- url or wikilink in author link;
  1920. end
  1921. names[n] = {last = last, first = first, link = link, mask = mask, corporate=false}; -- add this name to our names list (corporate for |vauthors= only)
  1922. n = n + 1; -- point to next location in the names table
  1923. if 1 == count then -- if the previous name was missing
  1924. table.insert( z.message_tail, { set_error( 'missing_name', {err_msg_list_name, i-1}, true ) } ); -- add this error message
  1925. end
  1926. count = 0; -- reset the counter, we're looking for two consecutive missing names
  1927. end
  1928. i = i + 1; -- point to next args location
  1929. end
  1930. return names, etal; -- all done, return our list of names
  1931. end
  1932.  
  1933. --[[--------------------------< B U I L D _ I D _ L I S T >--------------------------------------------------------
  1934.  
  1935. Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for
  1936. any of the parameters listed in each cfg.id_handlers['...'].parameters. If found, adds the parameter and value to
  1937. the identifier list. Emits redundant error message is more than one alias exists in args
  1938.  
  1939. ]]
  1940.  
  1941. local function extract_ids( args )
  1942. local id_list = {}; -- list of identifiers found in args
  1943. for k, v in pairs( cfg.id_handlers ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
  1944. v = select_one( args, v.parameters, 'redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present
  1945. if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list
  1946. end
  1947. return id_list;
  1948. end
  1949.  
  1950. --[[--------------------------< B U I L D _ I D _ L I S T >--------------------------------------------------------
  1951.  
  1952. Takes a table of IDs created by extract_ids() and turns it into a table of formatted ID outputs.
  1953.  
  1954. inputs:
  1955. id_list – table of identifiers built by extract_ids()
  1956. options – table of various template parameter values used to modify some manually handled identifiers
  1957.  
  1958. ]]
  1959.  
  1960. local function build_id_list( id_list, options )
  1961. local new_list, handler = {};
  1962.  
  1963. function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end;
  1964. for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table
  1965. -- fallback to read-only cfg
  1966. handler = setmetatable( { ['id'] = v }, fallback(k) );
  1967. if handler.mode == 'external' then
  1968. table.insert( new_list, {handler.label, external_link_id( handler ) } );
  1969. elseif handler.mode == 'internal' then
  1970. table.insert( new_list, {handler.label, internal_link_id( handler ) } );
  1971. elseif handler.mode ~= 'manual' then
  1972. error( cfg.messages['unknown_ID_mode'] );
  1973. elseif k == 'DOI' then
  1974. table.insert( new_list, {handler.label, doi( v, options.DoiBroken ) } );
  1975. elseif k == 'HDL' then
  1976. table.insert( new_list, {handler.label, hdl( v ) } );
  1977. elseif k == 'ARXIV' then
  1978. table.insert( new_list, {handler.label, arxiv( v, options.Class ) } );
  1979. elseif k == 'ASIN' then
  1980. table.insert( new_list, {handler.label, amazon( v, options.ASINTLD ) } );
  1981. elseif k == 'LCCN' then
  1982. table.insert( new_list, {handler.label, lccn( v ) } );
  1983. elseif k == 'OL' or k == 'OLA' then
  1984. table.insert( new_list, {handler.label, openlibrary( v ) } );
  1985. elseif k == 'PMC' then
  1986. table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } );
  1987. elseif k == 'PMID' then
  1988. table.insert( new_list, {handler.label, pmid( v ) } );
  1989. elseif k == 'ISMN' then
  1990. table.insert( new_list, {handler.label, ismn( v ) } );
  1991. elseif k == 'ISSN' then
  1992. table.insert( new_list, {handler.label, issn( v ) } );
  1993. elseif k == 'EISSN' then
  1994. table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn
  1995. elseif k == 'ISBN' then
  1996. local ISBN = internal_link_id( handler );
  1997. if not check_isbn( v ) and not is_set(options.IgnoreISBN) then
  1998. ISBN = ISBN .. set_error( 'bad_isbn', {}, false, " ", "" );
  1999. end
  2000. table.insert( new_list, {handler.label, ISBN } );
  2001. elseif k == 'USENETID' then
  2002. table.insert( new_list, {handler.label, message_id( v ) } );
  2003. else
  2004. error( cfg.messages['unknown_manual_ID'] );
  2005. end
  2006. end
  2007. function comp( a, b ) -- used in following table.sort()
  2008. return a[1] < b[1];
  2009. end
  2010. table.sort( new_list, comp );
  2011. for k, v in ipairs( new_list ) do
  2012. new_list[k] = v[2];
  2013. end
  2014. return new_list;
  2015. end
  2016.  
  2017. --[[--------------------------< C O I N S _ C L E A N U P >----------------------------------------------------
  2018.  
  2019. Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities.
  2020.  
  2021. 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaced math stripmarkers with the appropriate content
  2022. when it shouldn't. See https://phabricator.wikimedia.org/T121085 and Wikipedia_talk:Lua#stripmarkers_and_mw.text.unstripNoWiki.28.29
  2023.  
  2024. TODO: move the replacement patterns and replacement values into a table in /Configuration similar to the invisible
  2025. characters table?
  2026. ]]
  2027.  
  2028. local function coins_cleanup (value)
  2029. value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content
  2030. value = value:gsub ('<span class="nowrap" style="padding%-left:0%.1em;">&#39;s</span>', "'s"); -- replace {{'s}} template with simple apostrophe-s
  2031. value = value:gsub ('&zwj;\226\128\138\039\226\128\139', "'"); -- replace {{'}} with simple apostrophe
  2032. value = value:gsub ('\226\128\138\039\226\128\139', "'"); -- replace {{'}} with simple apostrophe (as of 2015-12-11)
  2033. value = value:gsub ('&nbsp;', ' '); -- replace &nbsp; entity with plain space
  2034. value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space
  2035. value = value:gsub ('&zwj;', ''); -- remove &zwj; entities
  2036. value = value:gsub ('[\226\128\141\226\128\139]', '') -- remove zero-width joiner, zero-width space
  2037. value = value:gsub ('[\194\173\009\010\013]', ' '); -- replace soft hyphen, horizontal tab, line feed, carriage return with plain space
  2038. return value;
  2039. end
  2040.  
  2041.  
  2042. --[[--------------------------< C O I N S >--------------------------------------------------------------------
  2043.  
  2044. COinS metadata (see <http://ocoins.info/>) allows automated tools to parse the citation information.
  2045.  
  2046. ]]
  2047.  
  2048. local function COinS(data, class)
  2049. if 'table' ~= type(data) or nil == next(data) then
  2050. return '';
  2051. end
  2052.  
  2053. for k, v in pairs (data) do -- spin through all of the metadata parameter values
  2054. if 'ID_list' ~= k and 'Authors' ~= k then -- except the ID_list and Author tables (author nowiki stripmarker done when Author table processed)
  2055. data[k] = coins_cleanup (v);
  2056. end
  2057. end
  2058.  
  2059. local ctx_ver = "Z39.88-2004";
  2060. -- treat table strictly as an array with only set values.
  2061. local OCinSoutput = setmetatable( {}, {
  2062. __newindex = function(self, key, value)
  2063. if is_set(value) then
  2064. rawset( self, #self+1, table.concat{ key, '=', mw.uri.encode( remove_wiki_link( value ) ) } );
  2065. end
  2066. end
  2067. });
  2068. if in_array (class, {'arxiv', 'journal', 'news'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
  2069. ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then
  2070. OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier
  2071. if 'arxiv' == class then -- set genre according to the type of citation template we are rendering
  2072. OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv
  2073. elseif 'conference' == class then
  2074. OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set)
  2075. elseif 'web' == class then
  2076. OCinSoutput["rft.genre"] = "unknown"; -- cite web (when Periodical set)
  2077. else
  2078. OCinSoutput["rft.genre"] = "article"; -- journal and other 'periodical' articles
  2079. end
  2080. OCinSoutput["rft.jtitle"] = data.Periodical; -- journal only
  2081. if is_set (data.Map) then
  2082. OCinSoutput["rft.atitle"] = data.Map; -- for a map in a periodical
  2083. else
  2084. OCinSoutput["rft.atitle"] = data.Title; -- all other 'periodical' article titles
  2085. end
  2086. -- these used onlu for periodicals
  2087. OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall
  2088. OCinSoutput["rft.chron"] = data.Chron; -- free-form date components
  2089. OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books
  2090. OCinSoutput["rft.issue"] = data.Issue;
  2091. OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata
  2092.  
  2093. elseif 'thesis' ~= class then -- all others except cite thesis are treated as 'book' metadata; genre distinguishes
  2094. OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:book"; -- book metadata identifier
  2095. if 'report' == class or 'techreport' == class then -- cite report and cite techreport
  2096. OCinSoutput["rft.genre"] = "report";
  2097. elseif 'conference' == class then -- cite conference when Periodical not set
  2098. OCinSoutput["rft.genre"] = "conference";
  2099. elseif in_array (class, {'book', 'citation', 'encyclopaedia', 'interview', 'map'}) then
  2100. if is_set (data.Chapter) then
  2101. OCinSoutput["rft.genre"] = "bookitem";
  2102. OCinSoutput["rft.atitle"] = data.Chapter; -- book chapter, encyclopedia article, interview in a book, or map title
  2103. else
  2104. if 'map' == class or 'interview' == class then
  2105. OCinSoutput["rft.genre"] = 'unknown'; -- standalone map or interview
  2106. else
  2107. OCinSoutput["rft.genre"] = 'book'; -- book and encyclopedia
  2108. end
  2109. end
  2110. else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'}
  2111. OCinSoutput["rft.genre"] = "unknown";
  2112. end
  2113. OCinSoutput["rft.btitle"] = data.Title; -- book only
  2114. OCinSoutput["rft.place"] = data.PublicationPlace; -- book only
  2115. OCinSoutput["rft.series"] = data.Series; -- book only
  2116. OCinSoutput["rft.pages"] = data.Pages; -- book, journal
  2117. OCinSoutput["rft.edition"] = data.Edition; -- book only
  2118. OCinSoutput["rft.pub"] = data.PublisherName; -- book and dissertation
  2119. else -- cite thesis
  2120. OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:dissertation"; -- dissertation metadata identifier
  2121. OCinSoutput["rft.title"] = data.Title; -- dissertation (also patent but that is not yet supported)
  2122. OCinSoutput["rft.degree"] = data.Degree; -- dissertation only
  2123. OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation
  2124. end
  2125. -- and now common parameters (as much as possible)
  2126. OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation
  2127. for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
  2128. if k == 'ISBN' then v = clean_isbn( v ) end
  2129. local id = cfg.id_handlers[k].COinS;
  2130. if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry
  2131. OCinSoutput["rft_id"] = table.concat{ id, "/", v };
  2132. elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords
  2133. OCinSoutput[ id ] = v;
  2134. elseif id then -- when cfg.id_handlers[k].COinS is not nil
  2135. OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url
  2136. end
  2137. end
  2138.  
  2139. --[[
  2140. for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all?
  2141. local id, value = cfg.id_handlers[k].COinS;
  2142. if k == 'ISBN' then value = clean_isbn( v ); else value = v; end
  2143. if string.sub( id or "", 1, 4 ) == 'info' then
  2144. OCinSoutput["rft_id"] = table.concat{ id, "/", v };
  2145. else
  2146. OCinSoutput[ id ] = value;
  2147. end
  2148. end
  2149. ]]
  2150. local last, first;
  2151. for k, v in ipairs( data.Authors ) do
  2152. last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers
  2153. if k == 1 then -- for the first author name only
  2154. if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name
  2155. OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation
  2156. OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation
  2157. elseif is_set(last) then
  2158. OCinSoutput["rft.au"] = last; -- book, journal, dissertation -- otherwise use this form for the first name
  2159. end
  2160. else -- for all other authors
  2161. if is_set(last) and is_set(first) then
  2162. OCinSoutput["rft.au"] = table.concat{ last, ", ", first }; -- book, journal, dissertation
  2163. elseif is_set(last) then
  2164. OCinSoutput["rft.au"] = last; -- book, journal, dissertation
  2165. end
  2166. end
  2167. end
  2168.  
  2169. OCinSoutput.rft_id = data.URL;
  2170. OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage };
  2171. OCinSoutput = setmetatable( OCinSoutput, nil );
  2172. -- sort with version string always first, and combine.
  2173. table.sort( OCinSoutput );
  2174. table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004"
  2175. return table.concat(OCinSoutput, "&");
  2176. end
  2177.  
  2178.  
  2179. --[[--------------------------< G E T _ I S O 6 3 9 _ C O D E >------------------------------------------------
  2180.  
  2181. Validates language names provided in |language= parameter if not an ISO639-1 code. Handles the special case that is Norwegian where
  2182. ISO639-1 code 'no' is mapped to language name 'Norwegian Bokmål' by Extention:CLDR.
  2183.  
  2184. Returns the language name and associated ISO639-1 code. Because case of the source may be incorrect or different from the case that Wikimedia
  2185. uses, the name comparisons are done in lower case and when a match is found, the Wikimedia version (assumed to be correct) is returned along
  2186. with the code. When there is no match, we return the original language name string.
  2187.  
  2188. mw.language.fetchLanguageNames() will return a list of languages that aren't part of ISO639-1. Names that aren't ISO639-1 but that are included
  2189. in the list will be found if that name is provided in the |language= parameter. For example, if |language=Samaritan Aramaic, that name will be
  2190. found with the associated code 'sam', not an ISO639-1 code. When names are found and the associated code is not two characters, this function
  2191. returns only the Wikimedia language name.
  2192.  
  2193. Adapted from code taken from Module:Check ISO 639-1.
  2194.  
  2195. ]]
  2196.  
  2197. local function get_iso639_code (lang)
  2198. if 'norwegian' == lang:lower() then -- special case related to Wikimedia remap of code 'no' at Extension:CLDR
  2199. return 'Norwegian', 'no'; -- Make sure rendered version is properly capitalized
  2200. end
  2201. local languages = mw.language.fetchLanguageNames(mw.getContentLanguage():getCode(), 'all') -- get a list of language names known to Wikimedia
  2202. -- ('all' is required for North Ndebele, South Ndebele, and Ojibwa)
  2203. local langlc = mw.ustring.lower(lang); -- lower case version for comparisons
  2204. for code, name in pairs(languages) do -- scan the list to see if we can find our language
  2205. if langlc == mw.ustring.lower(name) then
  2206. if 2 ~= code:len() then -- ISO639-1 codes only
  2207. return name; -- so return the name but not the code
  2208. end
  2209. return name, code; -- found it, return name to ensure proper capitalization and the ISO639-1 code
  2210. end
  2211. end
  2212. return lang; -- not valid language; return language in original case and nil for ISO639-1 code
  2213. end
  2214.  
  2215. --[[--------------------------< L A N G U A G E _ P A R A M E T E R >------------------------------------------
  2216.  
  2217. Get language name from ISO639-1 code value provided. If a code is valid use the returned name; if not, then use the value that was provided with the language parameter.
  2218.  
  2219. There is an exception. There are three ISO639-1 codes for Norewegian language variants. There are two official variants: Norwegian Bokmål (code 'nb') and
  2220. Norwegian Nynorsk (code 'nn'). The third, code 'no', is defined by ISO639-1 as 'Norwegian' though in Norway this is pretty much meaningless. However, it appears
  2221. that on enwiki, editors are for the most part unaware of the nb and nn variants (compare page counts for these variants at Category:Articles with non-English-language external links.
  2222.  
  2223. Because Norwegian Bokmål is the most common language variant, Media wiki has been modified to return Norwegian Bokmål for ISO639-1 code 'no'. Here we undo that and
  2224. return 'Norwegian' when editors use |language=no. We presume that editors don't know about the variants or can't descriminate between them.
  2225.  
  2226. See Help talk:Citation Style_1#An ISO 639-1 language name test
  2227.  
  2228. When |language= contains a valid ISO639-1 code, the page is assigned to the category for that code: Category:Norwegian-language sources (no) if
  2229. the page is a mainspace page and the ISO639-1 code is not 'en'. Similarly, if the parameter is |language=Norwegian, it will be categorized in the same way.
  2230.  
  2231. This function supports multiple languages in the form |language=nb, French, th where the language names or codes are separated from each other by commas.
  2232.  
  2233. ]]
  2234.  
  2235. local function language_parameter (lang)
  2236. local code; -- the ISO639-1 two character code
  2237. local name; -- the language name
  2238. local language_list = {}; -- table of language names to be rendered
  2239. local names_table = {}; -- table made from the value assigned to |language=
  2240.  
  2241. names_table = mw.text.split (lang, '%s*,%s*'); -- names should be a comma separated list
  2242.  
  2243. for _, lang in ipairs (names_table) do -- reuse lang
  2244.  
  2245. if lang:match ('^%a%a%-') or 2 == lang:len() then -- ISO639-1 language code are 2 characters (fetchLanguageName also supports 3 character codes)
  2246. if lang:match ('^zh-') then
  2247. name = mw.language.fetchLanguageName( lang:lower(), lang:lower() );
  2248. else
  2249. name = mw.language.fetchLanguageName( lang:lower(), mw.getContentLanguage():getCode() ); -- get ISO 639-1 language name if Language is a proper code
  2250. end
  2251. end
  2252. if is_set (name) then -- if Language specified a valid ISO639-1 code
  2253. code = lang:lower(); -- save it
  2254. else
  2255. name, code = get_iso639_code (lang); -- attempt to get code from name (assign name here so that we are sure of proper capitalization)
  2256. end
  2257. if is_set (code) then
  2258. if 'no' == code then name = '挪威语' end; -- override wikimedia when code is 'no'
  2259. if 'zh' ~= code and not code:match ('^zh-') then -- English not the language
  2260. add_prop_cat ('foreign_lang_source', {name, code})
  2261. end
  2262. else
  2263. add_maint_cat ('unknown_lang'); -- add maint category if not already added
  2264. end
  2265. table.insert (language_list, name);
  2266. name = ''; -- so we can reuse it
  2267. end
  2268. code = #language_list -- reuse code as number of languages in the list
  2269. if 2 >= code then
  2270. name = table.concat (language_list, '及') -- insert '及' between two language names
  2271. elseif 2 < code then
  2272. language_list[code] = '及' .. language_list[code]; -- prepend last name with '及'
  2273. name = table.concat (language_list, '、'); -- and concatenate with '<comma><space>' separators
  2274. name = name:gsub ('、及', '及', 1);
  2275. end
  2276. return (" " .. wrap_msg ('language', name)); -- otherwise wrap with '(in ...)'
  2277. end
  2278.  
  2279. --[[--------------------------< S E T _ C S 1 _ S T Y L E >----------------------------------------------------
  2280.  
  2281. Set style settings for CS1 citation templates. Returns separator and postscript settings
  2282.  
  2283. ]]
  2284.  
  2285. local function set_cs1_style (ps)
  2286. if not is_set (ps) then -- unless explicitely set to something
  2287. ps = '.'; -- terminate the rendered citation with a period
  2288. end
  2289. return '.', ps; -- separator is a full stop
  2290. end
  2291.  
  2292. --[[--------------------------< S E T _ C S 2 _ S T Y L E >----------------------------------------------------
  2293.  
  2294. Set style settings for CS2 citation templates. Returns separator, postscript, ref settings
  2295.  
  2296. ]]
  2297.  
  2298. local function set_cs2_style (ps, ref)
  2299. if not is_set (ps) then -- if |postscript= has not been set, set cs2 default
  2300. ps = ''; -- make sure it isn't nil
  2301. end
  2302. if not is_set (ref) then -- if |ref= is not set
  2303. ref = "harv"; -- set default |ref=harv
  2304. end
  2305. return ',', ps, ref; -- separator is a comma
  2306. end
  2307.  
  2308. --[[--------------------------< G E T _ S E T T I N G S _ F R O M _ C I T E _ C L A S S >----------------------
  2309.  
  2310. When |mode= is not set or when its value is invalid, use config.CitationClass and parameter values to establish
  2311. rendered style.
  2312.  
  2313. ]]
  2314.  
  2315. local function get_settings_from_cite_class (ps, ref, cite_class)
  2316. local sep;
  2317. if (cite_class == "citation") then -- for citation templates (CS2)
  2318. sep, ps, ref = set_cs2_style (ps, ref);
  2319. else -- not a citation template so CS1
  2320. sep, ps = set_cs1_style (ps);
  2321. end
  2322.  
  2323. return sep, ps, ref -- return them all
  2324. end
  2325.  
  2326. --[[--------------------------< S E T _ S T Y L E >------------------------------------------------------------
  2327.  
  2328. Establish basic style settings to be used when rendering the citation. Uses |mode= if set and valid or uses
  2329. config.CitationClass from the template's #invoke: to establish style.
  2330.  
  2331. ]]
  2332.  
  2333. local function set_style (mode, ps, ref, cite_class)
  2334. local sep;
  2335. if 'cs2' == mode then -- if this template is to be rendered in CS2 (citation) style
  2336. sep, ps, ref = set_cs2_style (ps, ref);
  2337. elseif 'cs1' == mode then -- if this template is to be rendered in CS1 (cite xxx) style
  2338. sep, ps = set_cs1_style (ps);
  2339. else -- anything but cs1 or cs2
  2340. sep, ps, ref = get_settings_from_cite_class (ps, ref, cite_class); -- get settings based on the template's CitationClass
  2341. end
  2342. if 'none' == ps:lower() then -- if assigned value is 'none' then
  2343. ps = ''; -- set to empty string
  2344. end
  2345. return sep, ps, ref
  2346. end
  2347.  
  2348. --[=[-------------------------< I S _ P D F >------------------------------------------------------------------
  2349.  
  2350. Determines if a url has the file extension that is one of the pdf file extensions used by [[MediaWiki:Common.css]] when
  2351. applying the pdf icon to external links.
  2352.  
  2353. returns true if file extension is one of the recognized extension, else false
  2354.  
  2355. ]=]
  2356.  
  2357. local function is_pdf (url)
  2358. return url:match ('%.pdf[%?#]?') or url:match ('%.PDF[%?#]?');
  2359. end
  2360.  
  2361. --[[--------------------------< S T Y L E _ F O R M A T >------------------------------------------------------
  2362.  
  2363. Applies css style to |format=, |chapter-format=, etc. Also emits an error message if the format parameter does
  2364. not have a matching url parameter. If the format parameter is not set and the url contains a file extension that
  2365. is recognized as a pdf document by MediaWiki's commons.css, this code will set the format parameter to (PDF) with
  2366. the appropriate styling.
  2367.  
  2368. ]]
  2369.  
  2370. local function style_format (format, url, fmt_param, url_param)
  2371. if is_set (format) then
  2372. format = wrap_style ('format', format); -- add leading space, parenthases, resize
  2373. if not is_set (url) then
  2374. format = format .. set_error( 'format_missing_url', {fmt_param, url_param} ); -- add an error message
  2375. end
  2376. elseif is_pdf (url) then -- format is not set so if url is a pdf file then
  2377. format = wrap_style ('format', 'PDF'); -- set format to pdf
  2378. else
  2379. format = ''; -- empty string for concatenation
  2380. end
  2381. return format;
  2382. end
  2383.  
  2384. --[[--------------------------< G E T _ D I S P L A Y _ A U T H O R S _ E D I T O R S >------------------------
  2385.  
  2386. Returns a number that may or may not limit the length of the author or editor name lists.
  2387.  
  2388. When the value assigned to |display-authors= is a number greater than or equal to zero, return the number and
  2389. the previous state of the 'etal' flag (false by default but may have been set to true if the name list contains
  2390. some variant of the text 'et al.').
  2391.  
  2392. When the value assigned to |display-authors= is the keyword 'etal', return a number that is one greater than the
  2393. number of authors in the list and set the 'etal' flag true. This will cause the list_people() to display all of
  2394. the names in the name list followed by 'et al.'
  2395.  
  2396. In all other cases, returns nil and the previous state of the 'etal' flag.
  2397.  
  2398. ]]
  2399.  
  2400. local function get_display_authors_editors (max, count, list_name, etal)
  2401. if is_set (max) then
  2402. if 'etal' == max:lower():gsub("[ '%.]", '') then -- the :gsub() portion makes 'etal' from a variety of 'et al.' spellings and stylings
  2403. max = count + 1; -- number of authors + 1 so display all author name plus et al.
  2404. etal = true; -- overrides value set by extract_names()
  2405. elseif max:match ('^%d+$') then -- if is a string of numbers
  2406. max = tonumber (max); -- make it a number
  2407. if max >= count and 'authors' == list_name then -- AUTHORS ONLY -- if |display-xxxxors= value greater than or equal to number of authors/editors
  2408. add_maint_cat ('disp_auth_ed', list_name);
  2409. end
  2410. else -- not a valid keyword or number
  2411. table.insert( z.message_tail, { set_error( 'invalid_param_val', {'display-' .. list_name, max}, true ) } ); -- add error message
  2412. max = nil; -- unset
  2413. end
  2414. elseif 'authors' == list_name then -- AUTHORS ONLY need to clear implicit et al category
  2415. max = count + 1; -- number of authors + 1
  2416. end
  2417. return max, etal;
  2418. end
  2419.  
  2420. --[[--------------------------< E X T R A _ T E X T _ I N _ P A G E _ C H E C K >------------------------------
  2421.  
  2422. Adds page to Category:CS1 maint: extra text if |page= or |pages= has what appears to be some form of p. or pp.
  2423. abbreviation in the first characters of the parameter content.
  2424.  
  2425. check Page and Pages for extraneous p, p., pp, and pp. at start of parameter value:
  2426. good pattern: '^P[^%.P%l]' matches when |page(s)= begins PX or P# but not Px where x and X are letters and # is a dgiit
  2427. bad pattern: '^[Pp][Pp]' matches matches when |page(s)= begins pp or pP or Pp or PP
  2428.  
  2429. ]]
  2430.  
  2431. local function extra_text_in_page_check (page)
  2432. -- local good_pattern = '^P[^%.P%l]';
  2433. local good_pattern = '^P[^%.Pp]'; -- ok to begin with uppercase P: P7 (pg 7 of section P) but not p123 (page 123) TODO: add Gg for PG or Pg?
  2434. -- local bad_pattern = '^[Pp][Pp]';
  2435. local bad_pattern = '^[Pp]?[Pp]%.?[ %d]';
  2436.  
  2437. if not page:match (good_pattern) and (page:match (bad_pattern) or page:match ('^[Pp]ages?')) then
  2438. add_maint_cat ('extra_text');
  2439. end
  2440. -- if Page:match ('^[Pp]?[Pp]%.?[ %d]') or Page:match ('^[Pp]ages?[ %d]') or
  2441. -- Pages:match ('^[Pp]?[Pp]%.?[ %d]') or Pages:match ('^[Pp]ages?[ %d]') then
  2442. -- add_maint_cat ('extra_text');
  2443. -- end
  2444. end
  2445.  
  2446.  
  2447. --[[--------------------------< P A R S E _ V A U T H O R S _ V E D I T O R S >--------------------------------
  2448.  
  2449. This function extracts author / editor names from |vauthors= or |veditors= and finds matching |xxxxor-maskn= and
  2450. |xxxxor-linkn= in args. It then returns a table of assembled names just as extract_names() does.
  2451.  
  2452. Author / editor names in |vauthors= or |veditors= must be in Vancouver system style. Corporate or institutional names
  2453. may sometimes be required and because such names will often fail the is_good_vanc_name() and other format compliance
  2454. tests, are wrapped in doubled paranethese ((corporate name)) to suppress the format tests.
  2455.  
  2456. This function sets the vancouver error when a reqired comma is missing and when there is a space between an author's initials.
  2457.  
  2458. ]]
  2459.  
  2460. local function parse_vauthors_veditors (args, vparam, list_name)
  2461. local names = {}; -- table of names assembled from |vauthors=, |author-maskn=, |author-linkn=
  2462. local v_name_table = {};
  2463. local etal = false; -- return value set to true when we find some form of et al. vauthors parameter
  2464. local last, first, link, mask;
  2465. local corporate = false;
  2466.  
  2467. vparam, etal = name_has_etal (vparam, etal, true); -- find and remove variations on et al. do not categorize (do it here because et al. might have a period)
  2468. if vparam:find ('%[%[') or vparam:find ('%]%]') then -- no wikilinking vauthors names
  2469. add_vanc_error ();
  2470. end
  2471. v_name_table = mw.text.split(vparam, "%s*,%s*") -- names are separated by commas
  2472.  
  2473. for i, v_name in ipairs(v_name_table) do
  2474. if v_name:match ('^%(%(.+%)%)$') then -- corporate authors are wrapped in doubled parenthese to supress vanc formatting and error detection
  2475. first = ''; -- set to empty string for concatenation and because it may have been set for previous author/editor
  2476. last = v_name:match ('^%(%((.+)%)%)$')
  2477. corporate = true;
  2478. elseif string.find(v_name, "%s") then
  2479. lastfirstTable = {}
  2480. lastfirstTable = mw.text.split(v_name, "%s")
  2481. first = table.remove(lastfirstTable); -- removes and returns value of last element in table which should be author intials
  2482. last = table.concat(lastfirstTable, " ") -- returns a string that is the concatenation of all other names that are not initials
  2483. if mw.ustring.match (last, '%a+%s+%u+%s+%a+') or mw.ustring.match (v_name, ' %u %u$') then
  2484. add_vanc_error (); -- matches last II last; the case when a comma is missing or a space between two intiials
  2485. end
  2486. else
  2487. first = ''; -- set to empty string for concatenation and because it may have been set for previous author/editor
  2488. last = v_name; -- last name or single corporate name? Doesn't support multiword corporate names? do we need this?
  2489. end
  2490. if is_set (first) and not mw.ustring.match (first, "^%u?%u$") then -- first shall contain one or two upper-case letters, nothing else
  2491. add_vanc_error ();
  2492. end
  2493. -- this from extract_names ()
  2494. link = select_one( args, cfg.aliases[list_name .. '-Link'], 'redundant_parameters', i );
  2495. mask = select_one( args, cfg.aliases[list_name .. '-Mask'], 'redundant_parameters', i );
  2496. names[i] = {last = last, first = first, link = link, mask = mask, corporate=corporate}; -- add this assembled name to our names list
  2497. end
  2498. return names, etal; -- all done, return our list of names
  2499. end
  2500.  
  2501. --[[--------------------------< S E L E C T _ A U T H O R _ E D I T O R _ S O U R C E >------------------------
  2502.  
  2503. Select one of |authors=, |authorn= / |lastn / firstn=, or |vauthors= as the source of the author name list or
  2504. select one of |editors=, |editorn= / editor-lastn= / |editor-firstn= or |veditors= as the source of the editor name list.
  2505.  
  2506. Only one of these appropriate three will be used. The hierarchy is: |authorn= (and aliases) highest and |authors= lowest and
  2507. similarly, |editorn= (and aliases) highest and |editors= lowest
  2508.  
  2509. When looking for |authorn= / |editorn= parameters, test |xxxxor1= and |xxxxor2= (and all of their aliases); stops after the second
  2510. test which mimicks the test used in extract_names() when looking for a hole in the author name list. There may be a better
  2511. way to do this, I just haven't discovered what that way is.
  2512.  
  2513. Emits an error message when more than one xxxxor name source is provided.
  2514.  
  2515. In this function, vxxxxors = vauthors or veditors; xxxxors = authors or editors as appropriate.
  2516.  
  2517. ]]
  2518.  
  2519. local function select_author_editor_source (vxxxxors, xxxxors, args, list_name)
  2520. local lastfirst = false;
  2521. if select_one( args, cfg.aliases[list_name .. '-Last'], 'none', 1 ) or -- do this twice incase we have a first 1 without a last1
  2522. select_one( args, cfg.aliases[list_name .. '-Last'], 'none', 2 ) then
  2523. lastfirst=true;
  2524. end
  2525.  
  2526. if (is_set (vxxxxors) and true == lastfirst) or -- these are the three error conditions
  2527. (is_set (vxxxxors) and is_set (xxxxors)) or
  2528. (true == lastfirst and is_set (xxxxors)) then
  2529. local err_name;
  2530. if 'AuthorList' == list_name then -- figure out which name should be used in error message
  2531. err_name = 'author';
  2532. else
  2533. err_name = 'editor';
  2534. end
  2535. table.insert( z.message_tail, { set_error( 'redundant_parameters',
  2536. {err_name .. '-name-list parameters'}, true ) } ); -- add error message
  2537. end
  2538.  
  2539. if true == lastfirst then return 1 end; -- return a number indicating which author name source to use
  2540. if is_set (vxxxxors) then return 2 end;
  2541. if is_set (xxxxors) then return 3 end;
  2542. return 1; -- no authors so return 1; this allows missing author name test to run in case there is a first without last
  2543. end
  2544.  
  2545.  
  2546. --[[--------------------------< I S _ V A L I D _ P A R A M E T E R _ V A L U E >------------------------------
  2547.  
  2548. This function is used to validate a parameter's assigned value for those parameters that have only a limited number
  2549. of allowable values (yes, y, true, no, etc). When the parameter value has not been assigned a value (missing or empty
  2550. in the source template) the function refurns true. If the parameter value is one of the list of allowed values returns
  2551. true; else, emits an error message and returns false.
  2552.  
  2553. ]]
  2554.  
  2555. local function is_valid_parameter_value (value, name, possible)
  2556. if not is_set (value) then
  2557. return true; -- an empty parameter is ok
  2558. elseif in_array(value:lower(), possible) then
  2559. return true;
  2560. else
  2561. table.insert( z.message_tail, { set_error( 'invalid_param_val', {name, value}, true ) } ); -- not an allowed value so add error message
  2562. return false
  2563. end
  2564. end
  2565.  
  2566.  
  2567. --[[--------------------------< T E R M I N A T E _ N A M E _ L I S T >----------------------------------------
  2568.  
  2569. This function terminates a name list (author, contributor, editor) with a separator character (sepc) and a space
  2570. when the last character is not a sepc character or when the last three characters are not sepc followed by two
  2571. closing square brackets (close of a wikilink). When either of these is true, the name_list is terminated with a
  2572. single space character.
  2573.  
  2574. ]]
  2575.  
  2576. local function terminate_name_list (name_list, sepc)
  2577. if (string.sub (name_list,-1,-1) == sepc) or (string.sub (name_list,-3,-1) == sepc .. ']]') then -- if last name in list ends with sepc char
  2578. return name_list .. " "; -- don't add another
  2579. else
  2580. return name_list .. sepc .. ' '; -- otherwise terninate the name list
  2581. end
  2582. end
  2583.  
  2584.  
  2585. --[[-------------------------< F O R M A T _ V O L U M E _ I S S U E >----------------------------------------
  2586.  
  2587. returns the concatenation of the formatted volume and issue parameters as a single string; or formatted volume
  2588. or formatted issue, or an empty string if neither are set.
  2589.  
  2590. ]]
  2591. local function format_volume_issue (volume, issue, cite_class, origin, sepc, lower)
  2592. if not is_set (volume) and not is_set (issue) then
  2593. return '';
  2594. end
  2595. if 'magazine' == cite_class or (in_array (cite_class, {'citation', 'map'}) and 'magazine' == origin) then
  2596. if is_set (volume) and is_set (issue) then
  2597. return wrap_msg ('vol-no', {sepc, volume, issue}, lower);
  2598. elseif is_set (volume) then
  2599. return wrap_msg ('vol', {sepc, volume}, lower);
  2600. else
  2601. return wrap_msg ('issue', {sepc, issue}, lower);
  2602. end
  2603. end
  2604. local vol = '';
  2605. if is_set (volume) then
  2606. if (6 < mw.ustring.len(volume)) then
  2607. vol = substitute (cfg.messages['j-vol'], {sepc, volume});
  2608. else
  2609. vol = wrap_style ('vol-bold', hyphen_to_dash(volume));
  2610. end
  2611. end
  2612. if is_set (issue) then
  2613. return vol .. substitute (cfg.messages['j-issue'], issue);
  2614. end
  2615. return vol;
  2616. end
  2617.  
  2618.  
  2619. --[[-------------------------< F O R M A T _ P A G E S _ S H E E T S >-----------------------------------------
  2620.  
  2621. adds static text to one of |page(s)= or |sheet(s)= values and returns it with all of the others set to empty strings.
  2622. The return order is:
  2623. page, pages, sheet, sheets
  2624.  
  2625. Singular has priority over plural when both are provided.
  2626.  
  2627. ]]
  2628.  
  2629. local function format_pages_sheets (page, pages, sheet, sheets, cite_class, origin, sepc, nopp, lower)
  2630. if 'map' == cite_class then -- only cite map supports sheet(s) as in-source locators
  2631. if is_set (sheet) then
  2632. if 'journal' == origin then
  2633. return '', '', wrap_msg ('j-sheet', sheet, lower), '';
  2634. else
  2635. return '', '', wrap_msg ('sheet', {sepc, sheet}, lower), '';
  2636. end
  2637. elseif is_set (sheets) then
  2638. if 'journal' == origin then
  2639. return '', '', '', wrap_msg ('j-sheets', sheets, lower);
  2640. else
  2641. return '', '', '', wrap_msg ('sheets', {sepc, sheets}, lower);
  2642. end
  2643. end
  2644. end
  2645.  
  2646. local is_journal = 'journal' == cite_class or (in_array (cite_class, {'citation', 'map'}) and 'journal' == origin);
  2647.  
  2648. if is_set (page) then
  2649. if is_journal then
  2650. return substitute (cfg.messages['j-page(s)'], page), '', '', '';
  2651. elseif not nopp then
  2652. return substitute (cfg.messages['p-prefix'], {sepc, page}), '', '', '';
  2653. else
  2654. return substitute (cfg.messages['nopp'], {sepc, page}), '', '', '';
  2655. end
  2656. elseif is_set(pages) then
  2657. if is_journal then
  2658. return substitute (cfg.messages['j-page(s)'], pages), '', '', '';
  2659. elseif tonumber(pages) ~= nil and not nopp then -- if pages is only digits, assume a single page number
  2660. return '', substitute (cfg.messages['p-prefix'], {sepc, pages}), '', '';
  2661. elseif not nopp then
  2662. return '', substitute (cfg.messages['pp-prefix'], {sepc, pages}), '', '';
  2663. else
  2664. return '', substitute (cfg.messages['nopp'], {sepc, pages}), '', '';
  2665. end
  2666. end
  2667. return '', '', '', ''; -- return empty strings
  2668. end
  2669.  
  2670. --[[--------------------------< C I T A T I O N 0 >------------------------------------------------------------
  2671.  
  2672. This is the main function doing the majority of the citation formatting.
  2673.  
  2674. ]]
  2675.  
  2676. local function citation0( config, args)
  2677. --[[
  2678. Load Input Parameters
  2679. The argument_wrapper facilitates the mapping of multiple aliases to single internal variable.
  2680. ]]
  2681. local A = argument_wrapper( args );
  2682. local i
  2683.  
  2684. -- Pick out the relevant fields from the arguments. Different citation templates
  2685. -- define different field names for the same underlying things.
  2686. local author_etal;
  2687. local a = {}; -- authors list from |lastn= / |firstn= pairs or |vauthors=
  2688. local Authors;
  2689. local NameListFormat = A['NameListFormat'];
  2690.  
  2691. do -- to limit scope of selected
  2692. local selected = select_author_editor_source (A['Vauthors'], A['Authors'], args, 'AuthorList');
  2693. if 1 == selected then
  2694. a, author_etal = extract_names (args, 'AuthorList'); -- fetch author list from |authorn= / |lastn= / |firstn=, |author-linkn=, and |author-maskn=
  2695. elseif 2 == selected then
  2696. NameListFormat = 'vanc'; -- override whatever |name-list-format= might be
  2697. a, author_etal = parse_vauthors_veditors (args, args.vauthors, 'AuthorList'); -- fetch author list from |vauthors=, |author-linkn=, and |author-maskn=
  2698. elseif 3 == selected then
  2699. Authors = A['Authors']; -- use content of |authors=
  2700. end
  2701. end
  2702.  
  2703. local Coauthors = A['Coauthors'];
  2704. local Others = A['Others'];
  2705.  
  2706. local editor_etal;
  2707. local e = {}; -- editors list from |editor-lastn= / |editor-firstn= pairs or |veditors=
  2708. local Editors;
  2709.  
  2710. do -- to limit scope of selected
  2711. local selected = select_author_editor_source (A['Veditors'], A['Editors'], args, 'EditorList');
  2712. if 1 == selected then
  2713. e, editor_etal = extract_names (args, 'EditorList'); -- fetch editor list from |editorn= / |editor-lastn= / |editor-firstn=, |editor-linkn=, and |editor-maskn=
  2714. elseif 2 == selected then
  2715. NameListFormat = 'vanc'; -- override whatever |name-list-format= might be
  2716. e, editor_etal = parse_vauthors_veditors (args, args.veditors, 'EditorList'); -- fetch editor list from |veditors=, |editor-linkn=, and |editor-maskn=
  2717. elseif 3 == selected then
  2718. Editors = A['Editors']; -- use content of |editors=
  2719. end
  2720. end
  2721.  
  2722. local t = {}; -- translators list from |translator-lastn= / translator-firstn= pairs
  2723. local Translators; -- assembled translators name list
  2724. t = extract_names (args, 'TranslatorList'); -- fetch translator list from |translatorn= / |translator-lastn=, -firstn=, -linkn=, -maskn=
  2725. local c = {}; -- contributors list from |contributor-lastn= / contributor-firstn= pairs
  2726. local Contributors; -- assembled contributors name list
  2727. local Contribution = A['Contribution'];
  2728. if in_array(config.CitationClass, {"book","citation"}) and not is_set(A['Periodical']) then -- |contributor= and |contribution= only supported in book cites
  2729. c = extract_names (args, 'ContributorList'); -- fetch contributor list from |contributorn= / |contributor-lastn=, -firstn=, -linkn=, -maskn=
  2730. if 0 < #c then
  2731. if not is_set (Contribution) then -- |contributor= requires |contribution=
  2732. table.insert( z.message_tail, { set_error( 'contributor_missing_required_param', 'contribution')}); -- add missing contribution error message
  2733. c = {}; -- blank the contributors' table; it is used as a flag later
  2734. end
  2735. if 0 == #a then -- |contributor= requires |author=
  2736. table.insert( z.message_tail, { set_error( 'contributor_missing_required_param', 'author')}); -- add missing author error message
  2737. c = {}; -- blank the contributors' table; it is used as a flag later
  2738. end
  2739. end
  2740. else -- if not a book cite
  2741. if select_one (args, cfg.aliases['ContributorList-Last'], 'redundant_parameters', 1 ) then -- are there contributor name list parameters?
  2742. table.insert( z.message_tail, { set_error( 'contributor_ignored')}); -- add contributor ignored error message
  2743. end
  2744. Contribution = nil; -- unset
  2745. end
  2746.  
  2747. if not is_valid_parameter_value (NameListFormat, 'name-list-format', cfg.keywords['name-list-format']) then -- only accepted value for this parameter is 'vanc'
  2748. NameListFormat = ''; -- anything else, set to empty string
  2749. end
  2750.  
  2751. local Year = A['Year'];
  2752. local PublicationDate = A['PublicationDate'];
  2753. local OrigYear = A['OrigYear'];
  2754. local Date = tidy_date( A['Date'] );
  2755. local LayDate = tidy_date( A['LayDate'] );
  2756. ------------------------------------------------- Get title data
  2757. local Title = A['Title'];
  2758. local ScriptTitle = A['ScriptTitle'];
  2759. local BookTitle = A['BookTitle'];
  2760. local Conference = A['Conference'];
  2761. local TransTitle = A['TransTitle'];
  2762. local TitleNote = A['TitleNote'];
  2763. local TitleLink = A['TitleLink'];
  2764. if is_set (TitleLink) and false == link_param_ok (TitleLink) then
  2765. table.insert( z.message_tail, { set_error( 'bad_paramlink', A:ORIGIN('TitleLink'))}); -- url or wikilink in |title-link=;
  2766. end
  2767.  
  2768. local Chapter = A['Chapter'];
  2769. local ScriptChapter = A['ScriptChapter'];
  2770. local ChapterLink -- = A['ChapterLink']; -- deprecated as a parameter but still used internally by cite episode
  2771. local TransChapter = A['TransChapter'];
  2772. local TitleType = A['TitleType'];
  2773. local Degree = A['Degree'];
  2774. local Docket = A['Docket'];
  2775. local ArchiveFormat = A['ArchiveFormat'];
  2776. local ArchiveURL = A['ArchiveURL'];
  2777. local URL = A['URL']
  2778. local URLorigin = A:ORIGIN('URL'); -- get name of parameter that holds URL
  2779. local ChapterURL = A['ChapterURL'];
  2780. local ChapterURLorigin = A:ORIGIN('ChapterURL'); -- get name of parameter that holds ChapterURL
  2781. local ConferenceFormat = A['ConferenceFormat'];
  2782. local ConferenceURL = A['ConferenceURL'];
  2783. local ConferenceURLorigin = A:ORIGIN('ConferenceURL'); -- get name of parameter that holds ConferenceURL
  2784. local Periodical = A['Periodical'];
  2785. local Periodical_origin = A:ORIGIN('Periodical'); -- get the name of the periodical parameter
  2786.  
  2787. local Series = A['Series'];
  2788. local Volume;
  2789. local Issue;
  2790. local Page;
  2791. local Pages;
  2792. local At;
  2793.  
  2794. if in_array (config.CitationClass, cfg.templates_using_volume) and not ('conference' == config.CitationClass and not is_set (Periodical)) then
  2795. Volume = A['Volume'];
  2796. end
  2797. if in_array (config.CitationClass, cfg.templates_using_issue) and not (in_array (config.CitationClass, {'conference', 'map'}) and not is_set (Periodical))then
  2798. Issue = A['Issue'];
  2799. end
  2800. local Position = '';
  2801. if not in_array (config.CitationClass, cfg.templates_not_using_page) then
  2802. Page = A['Page'];
  2803. Pages = hyphen_to_dash( A['Pages'] );
  2804. At = A['At'];
  2805. end
  2806.  
  2807. local Edition = A['Edition'];
  2808. local PublicationPlace = A['PublicationPlace']
  2809. local Place = A['Place'];
  2810. local PublisherName = A['PublisherName'];
  2811. local RegistrationRequired = A['RegistrationRequired'];
  2812. if not is_valid_parameter_value (RegistrationRequired, 'registration', cfg.keywords ['yes_true_y']) then
  2813. RegistrationRequired=nil;
  2814. end
  2815. local SubscriptionRequired = A['SubscriptionRequired'];
  2816. if not is_valid_parameter_value (SubscriptionRequired, 'subscription', cfg.keywords ['yes_true_y']) then
  2817. SubscriptionRequired=nil;
  2818. end
  2819.  
  2820. local Via = A['Via'];
  2821. local AccessDate = tidy_date( A['AccessDate'] );
  2822. local ArchiveDate = tidy_date( A['ArchiveDate'] );
  2823. local Agency = A['Agency'];
  2824. local DeadURL = A['DeadURL']
  2825. if not is_valid_parameter_value (DeadURL, 'dead-url', cfg.keywords ['deadurl']) then -- set in config.defaults to 'yes'
  2826. DeadURL = ''; -- anything else, set to empty string
  2827. end
  2828.  
  2829. local Language = A['Language'];
  2830. local Format = A['Format'];
  2831. local ChapterFormat = A['ChapterFormat'];
  2832. local DoiBroken = A['DoiBroken'];
  2833. local ID = A['ID'];
  2834. local ASINTLD = A['ASINTLD'];
  2835. local IgnoreISBN = A['IgnoreISBN'];
  2836. if not is_valid_parameter_value (IgnoreISBN, 'ignore-isbn-error', cfg.keywords ['yes_true_y']) then
  2837. IgnoreISBN = nil; -- anything else, set to empty string
  2838. end
  2839. local Embargo = A['Embargo'];
  2840. local Class = A['Class']; -- arxiv class identifier
  2841.  
  2842. local ID_list = extract_ids( args );
  2843.  
  2844. local Quote = A['Quote'];
  2845.  
  2846. local LayFormat = A['LayFormat'];
  2847. local LayURL = A['LayURL'];
  2848. local LaySource = A['LaySource'];
  2849. local Transcript = A['Transcript'];
  2850. local TranscriptFormat = A['TranscriptFormat'];
  2851. local TranscriptURL = A['TranscriptURL']
  2852. local TranscriptURLorigin = A:ORIGIN('TranscriptURL'); -- get name of parameter that holds TranscriptURL
  2853.  
  2854. local LastAuthorAmp = A['LastAuthorAmp'];
  2855. if not is_valid_parameter_value (LastAuthorAmp, 'last-author-amp', cfg.keywords ['yes_true_y']) then
  2856. LastAuthorAmp = nil; -- set to empty string
  2857. end
  2858. local no_tracking_cats = A['NoTracking'];
  2859. if not is_valid_parameter_value (no_tracking_cats, 'no-tracking', cfg.keywords ['yes_true_y']) then
  2860. no_tracking_cats = nil; -- set to empty string
  2861. end
  2862.  
  2863. --these are used by cite interview
  2864. local Callsign = A['Callsign'];
  2865. local City = A['City'];
  2866. local Program = A['Program'];
  2867.  
  2868. --local variables that are not cs1 parameters
  2869. local use_lowercase; -- controls capitalization of certain static text
  2870. local this_page = mw.title.getCurrentTitle(); -- also used for COinS and for language
  2871. local anchor_year; -- used in the CITEREF identifier
  2872. local COinS_date = {}; -- holds date info extracted from |date= for the COinS metadata by Module:Date verification
  2873.  
  2874. -- set default parameter values defined by |mode= parameter. If |mode= is empty or omitted, use CitationClass to set these values
  2875. local Mode = A['Mode'];
  2876. if not is_valid_parameter_value (Mode, 'mode', cfg.keywords['mode']) then
  2877. Mode = '';
  2878. end
  2879. local sepc; -- separator between citation elements for CS1 a period, for CS2, a comma
  2880. local PostScript;
  2881. local Ref;
  2882. sepc, PostScript, Ref = set_style (Mode:lower(), A['PostScript'], A['Ref'], config.CitationClass);
  2883. use_lowercase = ( sepc == ',' ); -- used to control capitalization for certain static text
  2884.  
  2885. --check this page to see if it is in one of the namespaces that cs1 is not supposed to add to the error categories
  2886. if not is_set (no_tracking_cats) then -- ignore if we are already not going to categorize this page
  2887. if in_array (this_page.nsText, cfg.uncategorized_namespaces) then
  2888. no_tracking_cats = "true"; -- set no_tracking_cats
  2889. end
  2890. for _,v in ipairs (cfg.uncategorized_subpages) do -- cycle through page name patterns
  2891. if this_page.text:match (v) then -- test page name against each pattern
  2892. no_tracking_cats = "true"; -- set no_tracking_cats
  2893. break; -- bail out if one is found
  2894. end
  2895. end
  2896. end
  2897.  
  2898. -- check for extra |page=, |pages= or |at= parameters. (also sheet and sheets while we're at it)
  2899. select_one( args, {'page', 'p', 'pp', 'pages', 'at', 'sheet', 'sheets'}, 'redundant_parameters' ); -- this is a dummy call simply to get the error message and category
  2900.  
  2901. local NoPP = A['NoPP']
  2902. if is_set (NoPP) and is_valid_parameter_value (NoPP, 'nopp', cfg.keywords ['yes_true_y']) then
  2903. NoPP = true;
  2904. else
  2905. NoPP = nil; -- unset, used as a flag later
  2906. end
  2907.  
  2908. if is_set(Page) then
  2909. if is_set(Pages) or is_set(At) then
  2910. Pages = ''; -- unset the others
  2911. At = '';
  2912. end
  2913. extra_text_in_page_check (Page); -- add this page to maint cat if |page= value begins with what looks like p. or pp.
  2914. elseif is_set(Pages) then
  2915. if is_set(At) then
  2916. At = ''; -- unset
  2917. end
  2918. extra_text_in_page_check (Pages); -- add this page to maint cat if |pages= value begins with what looks like p. or pp.
  2919. end
  2920.  
  2921. -- both |publication-place= and |place= (|location=) allowed if different
  2922. if not is_set(PublicationPlace) and is_set(Place) then
  2923. PublicationPlace = Place; -- promote |place= (|location=) to |publication-place
  2924. end
  2925. if PublicationPlace == Place then Place = ''; end -- don't need both if they are the same
  2926. --[[
  2927. Parameter remapping for cite encyclopedia:
  2928. When the citation has these parameters:
  2929. |encyclopedia and |title then map |title to |article and |encyclopedia to |title
  2930. |encyclopedia and |article then map |encyclopedia to |title
  2931. |encyclopedia then map |encyclopedia to |title
  2932.  
  2933. |trans_title maps to |trans_chapter when |title is re-mapped
  2934. |url maps to |chapterurl when |title is remapped
  2935.  
  2936. All other combinations of |encyclopedia, |title, and |article are not modified
  2937.  
  2938. ]]
  2939.  
  2940. local Encyclopedia = A['Encyclopedia'];
  2941.  
  2942. if ( config.CitationClass == "encyclopaedia" ) or ( config.CitationClass == "citation" and is_set (Encyclopedia)) then -- test code for citation
  2943. if is_set(Periodical) then -- Periodical is set when |encyclopedia is set
  2944. if is_set(Title) or is_set (ScriptTitle) then
  2945. if not is_set(Chapter) then
  2946. Chapter = Title; -- |encyclopedia and |title are set so map |title to |article and |encyclopedia to |title
  2947. ScriptChapter = ScriptTitle;
  2948. TransChapter = TransTitle;
  2949. ChapterURL = URL;
  2950. if not is_set (ChapterURL) and is_set (TitleLink) then
  2951. Chapter= '[[' .. TitleLink .. '|' .. Chapter .. ']]';
  2952. end
  2953. Title = Periodical;
  2954. ChapterFormat = Format;
  2955. Periodical = ''; -- redundant so unset
  2956. TransTitle = '';
  2957. URL = '';
  2958. Format = '';
  2959. TitleLink = '';
  2960. ScriptTitle = '';
  2961. end
  2962. else -- |title not set
  2963. Title = Periodical; -- |encyclopedia set and |article set or not set so map |encyclopedia to |title
  2964. Periodical = ''; -- redundant so unset
  2965. end
  2966. end
  2967. end
  2968.  
  2969. -- Special case for cite techreport.
  2970. if (config.CitationClass == "techreport") then -- special case for cite techreport
  2971. if is_set(A['Number']) then -- cite techreport uses 'number', which other citations alias to 'issue'
  2972. if not is_set(ID) then -- can we use ID for the "number"?
  2973. ID = A['Number']; -- yes, use it
  2974. else -- ID has a value so emit error message
  2975. table.insert( z.message_tail, { set_error('redundant_parameters', {wrap_style ('parameter', 'id') .. ' and ' .. wrap_style ('parameter', 'number')}, true )});
  2976. end
  2977. end
  2978. end
  2979.  
  2980. -- special case for cite interview
  2981. if (config.CitationClass == "interview") then
  2982. if is_set(Program) then
  2983. ID = ' ' .. Program;
  2984. end
  2985. if is_set(Callsign) then
  2986. if is_set(ID) then
  2987. ID = ID .. sepc .. ' ' .. Callsign;
  2988. else
  2989. ID = ' ' .. Callsign;
  2990. end
  2991. end
  2992. if is_set(City) then
  2993. if is_set(ID) then
  2994. ID = ID .. sepc .. ' ' .. City;
  2995. else
  2996. ID = ' ' .. City;
  2997. end
  2998. end
  2999.  
  3000. if is_set(Others) then
  3001. if is_set(TitleType) then
  3002. Others = ' ' .. TitleType .. ' with ' .. Others;
  3003. TitleType = '';
  3004. else
  3005. Others = ' ' .. 'Interview with ' .. Others;
  3006. end
  3007. else
  3008. Others = '(Interview)';
  3009. end
  3010. end
  3011.  
  3012. -- special case for cite mailing list
  3013. if (config.CitationClass == "mailinglist") then
  3014. Periodical = A ['MailingList'];
  3015. elseif 'mailinglist' == A:ORIGIN('Periodical') then
  3016. Periodical = ''; -- unset because mailing list is only used for cite mailing list
  3017. end
  3018.  
  3019. -- Account for the oddity that is {{cite conference}}, before generation of COinS data.
  3020. if 'conference' == config.CitationClass then
  3021. if is_set(BookTitle) then
  3022. Chapter = Title;
  3023. -- ChapterLink = TitleLink; -- |chapterlink= is deprecated
  3024. ChapterURL = URL;
  3025. ChapterURLorigin = URLorigin;
  3026. URLorigin = '';
  3027. ChapterFormat = Format;
  3028. TransChapter = TransTitle;
  3029. Title = BookTitle;
  3030. Format = '';
  3031. -- TitleLink = '';
  3032. TransTitle = '';
  3033. URL = '';
  3034. end
  3035. elseif 'speech' ~= config.CitationClass then
  3036. Conference = ''; -- not cite conference or cite speech so make sure this is empty string
  3037. end
  3038.  
  3039. -- cite map oddities
  3040. local Cartography = "";
  3041. local Scale = "";
  3042. local Sheet = A['Sheet'] or '';
  3043. local Sheets = A['Sheets'] or '';
  3044. if config.CitationClass == "map" then
  3045. Chapter = A['Map'];
  3046. ChapterURL = A['MapURL'];
  3047. TransChapter = A['TransMap'];
  3048. ChapterURLorigin = A:ORIGIN('MapURL');
  3049. ChapterFormat = A['MapFormat'];
  3050. Cartography = A['Cartography'];
  3051. if is_set( Cartography ) then
  3052. Cartography = sepc .. " " .. wrap_msg ('cartography', Cartography, use_lowercase);
  3053. end
  3054. Scale = A['Scale'];
  3055. if is_set( Scale ) then
  3056. Scale = sepc .. " " .. Scale;
  3057. end
  3058. end
  3059.  
  3060. -- Account for the oddities that are {{cite episode}} and {{cite serial}}, before generation of COinS data.
  3061. if 'episode' == config.CitationClass or 'serial' == config.CitationClass then
  3062. local AirDate = A['AirDate'];
  3063. local SeriesLink = A['SeriesLink'];
  3064. if is_set (SeriesLink) and false == link_param_ok (SeriesLink) then
  3065. table.insert( z.message_tail, { set_error( 'bad_paramlink', A:ORIGIN('SeriesLink'))});
  3066. end
  3067. local Network = A['Network'];
  3068. local Station = A['Station'];
  3069. local s, n = {}, {};
  3070. -- do common parameters first
  3071. if is_set(Network) then table.insert(n, Network); end
  3072. if is_set(Station) then table.insert(n, Station); end
  3073. ID = table.concat(n, sepc .. ' ');
  3074. if not is_set (Date) and is_set (AirDate) then -- promote airdate to date
  3075. Date = AirDate;
  3076. end
  3077.  
  3078. if 'episode' == config.CitationClass then -- handle the oddities that are strictly {{cite episode}}
  3079. local Season = A['Season'];
  3080. local SeriesNumber = A['SeriesNumber'];
  3081.  
  3082. if is_set (Season) and is_set (SeriesNumber) then -- these are mutually exclusive so if both are set
  3083. table.insert( z.message_tail, { set_error( 'redundant_parameters', {wrap_style ('parameter', 'season') .. ' and ' .. wrap_style ('parameter', 'seriesno')}, true ) } ); -- add error message
  3084. SeriesNumber = ''; -- unset; prefer |season= over |seriesno=
  3085. end
  3086. -- assemble a table of parts concatenated later into Series
  3087. if is_set(Season) then table.insert(s, wrap_msg ('season', Season, use_lowercase)); end
  3088. if is_set(SeriesNumber) then table.insert(s, wrap_msg ('series', SeriesNumber, use_lowercase)); end
  3089. if is_set(Issue) then table.insert(s, wrap_msg ('episode', Issue, use_lowercase)); end
  3090. Issue = ''; -- unset because this is not a unique parameter
  3091. Chapter = Title; -- promote title parameters to chapter
  3092. ScriptChapter = ScriptTitle;
  3093. ChapterLink = TitleLink; -- alias episodelink
  3094. TransChapter = TransTitle;
  3095. ChapterURL = URL;
  3096. ChapterURLorigin = A:ORIGIN('URL');
  3097. Title = Series; -- promote series to title
  3098. TitleLink = SeriesLink;
  3099. Series = table.concat(s, sepc .. ' '); -- this is concatenation of season, seriesno, episode number
  3100.  
  3101. if is_set (ChapterLink) and not is_set (ChapterURL) then -- link but not URL
  3102. Chapter = '[[' .. ChapterLink .. '|' .. Chapter .. ']]'; -- ok to wikilink
  3103. elseif is_set (ChapterLink) and is_set (ChapterURL) then -- if both are set, URL links episode;
  3104. Series = '[[' .. ChapterLink .. '|' .. Series .. ']]'; -- series links with ChapterLink (episodelink -> TitleLink -> ChapterLink) ugly
  3105. end
  3106. URL = ''; -- unset
  3107. TransTitle = '';
  3108. ScriptTitle = '';
  3109. else -- now oddities that are cite serial
  3110. Issue = ''; -- unset because this parameter no longer supported by the citation/core version of cite serial
  3111. Chapter = A['Episode']; -- TODO: make |episode= available to cite episode someday?
  3112. if is_set (Series) and is_set (SeriesLink) then
  3113. Series = '[[' .. SeriesLink .. '|' .. Series .. ']]';
  3114. end
  3115. Series = wrap_style ('italic-title', Series); -- series is italicized
  3116. end
  3117. end
  3118. -- end of {{cite episode}} stuff
  3119.  
  3120. -- Account for the oddities that are {{cite arxiv}}, before generation of COinS data.
  3121. if 'arxiv' == config.CitationClass then
  3122. if not is_set (ID_list['ARXIV']) then -- |arxiv= or |eprint= required for cite arxiv
  3123. table.insert( z.message_tail, { set_error( 'arxiv_missing', {}, true ) } ); -- add error message
  3124. elseif is_set (Series) then -- series is an alias of version
  3125. ID_list['ARXIV'] = ID_list['ARXIV'] .. Series; -- concatenate version onto the end of the arxiv identifier
  3126. Series = ''; -- unset
  3127. deprecated_parameter ('version'); -- deprecated parameter but only for cite arxiv
  3128. end
  3129. if first_set ({AccessDate, At, Chapter, Format, Page, Pages, Periodical, PublisherName, URL, -- a crude list of parameters that are not supported by cite arxiv
  3130. ID_list['ASIN'], ID_list['BIBCODE'], ID_list['DOI'], ID_list['ISBN'], ID_list['ISSN'],
  3131. ID_list['JFM'], ID_list['JSTOR'], ID_list['LCCN'], ID_list['MR'], ID_list['OCLC'], ID_list['OL'],
  3132. ID_list['OSTI'], ID_list['PMC'], ID_list['PMID'], ID_list['RFC'], ID_list['SSRN'], ID_list['USENETID'], ID_list['ZBL']},27) then
  3133. table.insert( z.message_tail, { set_error( 'arxiv_params_not_supported', {}, true ) } ); -- add error message
  3134.  
  3135. AccessDate= ''; -- set these to empty string; not supported in cite arXiv
  3136. PublisherName = ''; -- (if the article has been published, use cite journal, or other)
  3137. Chapter = '';
  3138. URL = '';
  3139. Format = '';
  3140. Page = ''; Pages = ''; At = '';
  3141. end
  3142. Periodical = 'arXiv'; -- set to arXiv for COinS; after that, must be set to empty string
  3143. end
  3144.  
  3145. -- handle type parameter for those CS1 citations that have default values
  3146. if in_array(config.CitationClass, {"AV-media-notes", "DVD-notes", "mailinglist", "map", "podcast", "pressrelease", "report", "techreport", "thesis"}) then
  3147. TitleType = set_titletype (config.CitationClass, TitleType);
  3148. if is_set(Degree) and "Thesis" == TitleType then -- special case for cite thesis
  3149. TitleType = Degree .. "论文";
  3150. end
  3151. end
  3152.  
  3153. if is_set(TitleType) then -- if type parameter is specified
  3154. TitleType = substitute( cfg.messages['type'], TitleType); -- display it in parentheses
  3155. end
  3156.  
  3157. -- legacy: promote concatenation of |month=, and |year= to Date if Date not set; or, promote PublicationDate to Date if neither Date nor Year are set.
  3158. if not is_set (Date) then
  3159. Date = Year; -- promote Year to Date
  3160. Year = nil; -- make nil so Year as empty string isn't used for CITEREF
  3161. if not is_set (Date) and is_set(PublicationDate) then -- use PublicationDate when |date= and |year= are not set
  3162. Date = PublicationDate; -- promote PublicationDate to Date
  3163. PublicationDate = ''; -- unset, no longer needed
  3164. end
  3165. end
  3166.  
  3167. if PublicationDate == Date then PublicationDate = ''; end -- if PublicationDate is same as Date, don't display in rendered citation
  3168.  
  3169. --[[
  3170. Go test all of the date-holding parameters for valid MOS:DATE format and make sure that dates are real dates. This must be done before we do COinS because here is where
  3171. we get the date used in the metadata.
  3172.  
  3173. Date validation supporting code is in Module:Citation/CS1/Date_validation
  3174. ]]
  3175. do -- create defined block to contain local variables error_message and mismatch
  3176. local error_message = '';
  3177. -- AirDate has been promoted to Date so not necessary to check it
  3178. anchor_year, error_message = dates({['access-date']=AccessDate, ['archive-date']=ArchiveDate, ['date']=Date, ['doi-broken-date']=DoiBroken,
  3179. ['embargo']=Embargo, ['lay-date']=LayDate, ['publication-date']=PublicationDate, ['year']=Year}, COinS_date);
  3180.  
  3181. if is_set (Year) and is_set (Date) then -- both |date= and |year= not normally needed;
  3182. local mismatch = year_date_check (Year, Date)
  3183. if 0 == mismatch then -- |year= does not match a year-value in |date=
  3184. if is_set (error_message) then -- if there is already an error message
  3185. error_message = error_message .. ', '; -- tack on this additional message
  3186. end
  3187. error_message = error_message .. '&#124;year= / &#124;date= mismatch';
  3188. elseif 1 == mismatch then -- |year= matches year-value in |date=
  3189. add_maint_cat ('date_year');
  3190. end
  3191. end
  3192.  
  3193. if is_set(error_message) then
  3194. table.insert( z.message_tail, { set_error( 'bad_date', {error_message}, true ) } ); -- add this error message
  3195. end
  3196. end -- end of do
  3197.  
  3198. -- Account for the oddity that is {{cite journal}} with |pmc= set and |url= not set. Do this after date check but before COInS.
  3199. -- Here we unset Embargo if PMC not embargoed (|embargo= not set in the citation) or if the embargo time has expired. Otherwise, holds embargo date
  3200. Embargo = is_embargoed (Embargo); --
  3201.  
  3202. if config.CitationClass == "journal" and not is_set(URL) and is_set(ID_list['PMC']) then
  3203. if not is_set (Embargo) then -- if not embargoed or embargo has expired
  3204. URL=cfg.id_handlers['PMC'].prefix .. ID_list['PMC']; -- set url to be the same as the PMC external link if not embargoed
  3205. URLorigin = cfg.id_handlers['PMC'].parameters[1]; -- set URLorigin to parameter name for use in error message if citation is missing a |title=
  3206. end
  3207. end
  3208.  
  3209. -- At this point fields may be nil if they weren't specified in the template use. We can use that fact.
  3210. -- Test if citation has no title
  3211. if not is_set(Title) and
  3212. not is_set(TransTitle) and
  3213. not is_set(ScriptTitle) then
  3214. if 'episode' == config.CitationClass then -- special case for cite episode; TODO: is there a better way to do this?
  3215. table.insert( z.message_tail, { set_error( 'citation_missing_title', {'series'}, true ) } );
  3216. else
  3217. table.insert( z.message_tail, { set_error( 'citation_missing_title', {'title'}, true ) } );
  3218. end
  3219. end
  3220. if 'none' == Title and in_array (config.CitationClass, {'journal', 'citation'}) and is_set (Periodical) and 'journal' == A:ORIGIN('Periodical') then -- special case for journal cites
  3221. Title = ''; -- set title to empty string
  3222. add_maint_cat ('untitled');
  3223. end
  3224.  
  3225. check_for_url ({ -- add error message when any of these parameters contains a URL
  3226. ['title']=Title,
  3227. [A:ORIGIN('Chapter')]=Chapter,
  3228. [A:ORIGIN('Periodical')]=Periodical,
  3229. [A:ORIGIN('PublisherName')] = PublisherName,
  3230. });
  3231.  
  3232. -- COinS metadata (see <http://ocoins.info/>) for automated parsing of citation information.
  3233. -- handle the oddity that is cite encyclopedia and {{citation |encyclopedia=something}}. Here we presume that
  3234. -- when Periodical, Title, and Chapter are all set, then Periodical is the book (encyclopedia) title, Title
  3235. -- is the article title, and Chapter is a section within the article. So, we remap
  3236. local coins_chapter = Chapter; -- default assuming that remapping not required
  3237. local coins_title = Title; -- et tu
  3238. if 'encyclopaedia' == config.CitationClass or ('citation' == config.CitationClass and is_set (Encyclopedia)) then
  3239. if is_set (Chapter) and is_set (Title) and is_set (Periodical) then -- if all are used then
  3240. coins_chapter = Title; -- remap
  3241. coins_title = Periodical;
  3242. end
  3243. end
  3244. local coins_author = a; -- default for coins rft.au
  3245. if 0 < #c then -- but if contributor list
  3246. coins_author = c; -- use that instead
  3247. end
  3248.  
  3249. -- this is the function call to COinS()
  3250. local OCinSoutput = COinS({
  3251. ['Periodical'] = Periodical,
  3252. ['Encyclopedia'] = Encyclopedia,
  3253. ['Chapter'] = make_coins_title (coins_chapter, ScriptChapter), -- Chapter and ScriptChapter stripped of bold / italic wikimarkup
  3254. ['Map'] = Map,
  3255. ['Degree'] = Degree; -- cite thesis only
  3256. ['Title'] = make_coins_title (coins_title, ScriptTitle), -- Title and ScriptTitle stripped of bold / italic wikimarkup
  3257. ['PublicationPlace'] = PublicationPlace,
  3258. ['Date'] = COinS_date.rftdate, -- COinS_date has correctly formatted date if Date is valid;
  3259. ['Season'] = COinS_date.rftssn,
  3260. ['Chron'] = COinS_date.rftchron or (not COinS_date.rftdate and Date) or '', -- chron but if not set and invalid date format use Date; keep this last bit?
  3261. ['Series'] = Series,
  3262. ['Volume'] = Volume,
  3263. ['Issue'] = Issue,
  3264. ['Pages'] = get_coins_pages (first_set ({Sheet, Sheets, Page, Pages, At}, 5)), -- pages stripped of external links
  3265. ['Edition'] = Edition,
  3266. ['PublisherName'] = PublisherName,
  3267. ['URL'] = first_set ({ChapterURL, URL}, 2),
  3268. ['Authors'] = coins_author,
  3269. ['ID_list'] = ID_list,
  3270. ['RawPage'] = this_page.prefixedText,
  3271. }, config.CitationClass);
  3272.  
  3273. -- Account for the oddities that are {{cite arxiv}}, AFTER generation of COinS data.
  3274. if 'arxiv' == config.CitationClass then -- we have set rft.jtitle in COinS to arXiv, now unset so it isn't displayed
  3275. Periodical = ''; -- periodical not allowed in cite arxiv; if article has been published, use cite journal
  3276. end
  3277.  
  3278. -- special case for cite newsgroup. Do this after COinS because we are modifying Publishername to include some static text
  3279. if 'newsgroup' == config.CitationClass then
  3280. if is_set (PublisherName) then
  3281. PublisherName = substitute (cfg.messages['newsgroup'], external_link( 'news:' .. PublisherName, PublisherName, A:ORIGIN('PublisherName') ));
  3282. end
  3283. end
  3284.  
  3285.  
  3286.  
  3287. -- Now perform various field substitutions.
  3288. -- We also add leading spaces and surrounding markup and punctuation to the
  3289. -- various parts of the citation, but only when they are non-nil.
  3290. local EditorCount; -- used only for choosing {ed.) or (eds.) annotation at end of editor name-list
  3291. do
  3292. local last_first_list;
  3293. local maximum;
  3294. local control = {
  3295. format = NameListFormat, -- empty string or 'vanc'
  3296. maximum = nil, -- as if display-authors or display-editors not set
  3297. lastauthoramp = LastAuthorAmp,
  3298. page_name = this_page.text -- get current page name so that we don't wikilink to it via editorlinkn
  3299. };
  3300.  
  3301. do -- do editor name list first because coauthors can modify control table
  3302. maximum , editor_etal = get_display_authors_editors (A['DisplayEditors'], #e, 'editors', editor_etal);
  3303. --[[ Preserve old-style implicit et al.
  3304. 临时修复"Category:含有旧式缩略标签的引用的页面 in editors"的问题,中文版目前与英文版逻辑不一样,暂时不需要这个分类。等以后更新时再看怎么处理 --2017.6.23 shizhao
  3305. if not is_set(maximum) and #e == 4 then
  3306. maximum = 3;
  3307. table.insert( z.message_tail, { set_error('implict_etal_editor', {}, true) } );
  3308. end
  3309. ]]
  3310.  
  3311. control.maximum = maximum;
  3312. last_first_list, EditorCount = list_people(control, e, editor_etal, 'editor');
  3313.  
  3314. if is_set (Editors) then
  3315. if editor_etal then
  3316. Editors = Editors .. ' ' .. cfg.messages['et al']; -- add et al. to editors parameter beause |display-editors=etal
  3317. EditorCount = 2; -- with et al., |editors= is multiple names; spoof to display (eds.) annotation
  3318. else
  3319. EditorCount = 2; -- we don't know but assume |editors= is multiple names; spoof to display (eds.) annotation
  3320. end
  3321. else
  3322. Editors = last_first_list; -- either an author name list or an empty string
  3323. end
  3324.  
  3325. if 1 == EditorCount and (true == editor_etal or 1 < #e) then -- only one editor displayed but includes etal then
  3326. EditorCount = 2; -- spoof to display (eds.) annotation
  3327. end
  3328. end
  3329. do -- now do translators
  3330. control.maximum = #t; -- number of translators
  3331. Translators = list_people(control, t, false, 'translator'); -- et al not currently supported
  3332. end
  3333. do -- now do contributors
  3334. control.maximum = #c; -- number of contributors
  3335. Contributors = list_people(control, c, false, 'contributor'); -- et al not currently supported
  3336. end
  3337. do -- now do authors
  3338. control.maximum , author_etal = get_display_authors_editors (A['DisplayAuthors'], #a, 'authors', author_etal);
  3339.  
  3340. if is_set(Coauthors) then -- if the coauthor field is also used, prevent ampersand and et al. formatting.
  3341. control.lastauthoramp = nil;
  3342. control.maximum = #a + 1;
  3343. end
  3344. last_first_list = list_people(control, a, author_etal, 'author');
  3345.  
  3346. if is_set (Authors) then
  3347. Authors, author_etal = name_has_etal (Authors, author_etal, false); -- find and remove variations on et al.
  3348. if author_etal then
  3349. Authors = Authors .. ' ' .. cfg.messages['et al']; -- add et al. to authors parameter
  3350. end
  3351. else
  3352. Authors = last_first_list; -- either an author name list or an empty string
  3353. end
  3354. end -- end of do
  3355.  
  3356. if not is_set(Authors) and is_set(Coauthors) then -- coauthors aren't displayed if one of authors=, authorn=, or lastn= isn't specified
  3357. table.insert( z.message_tail, { set_error('coauthors_missing_author', {}, true) } ); -- emit error message
  3358. end
  3359. end
  3360.  
  3361. -- apply |[xx-]format= styling; at the end, these parameters hold correctly styled format annotation,
  3362. -- an error message if the associated url is not set, or an empty string for concatenation
  3363. ArchiveFormat = style_format (ArchiveFormat, ArchiveURL, 'archive-format', 'archive-url');
  3364. ConferenceFormat = style_format (ConferenceFormat, ConferenceURL, 'conference-format', 'conference-url');
  3365. Format = style_format (Format, URL, 'format', 'url');
  3366. LayFormat = style_format (LayFormat, LayURL, 'lay-format', 'lay-url');
  3367. TranscriptFormat = style_format (TranscriptFormat, TranscriptURL, 'transcript-format', 'transcripturl');
  3368.  
  3369. -- special case for chapter format so no error message or cat when chapter not supported
  3370. if not (in_array(config.CitationClass, {'web','news','journal', 'magazine', 'pressrelease','podcast', 'newsgroup', 'arxiv'}) or
  3371. ('citation' == config.CitationClass and is_set (Periodical) and not is_set (Encyclopedia))) then
  3372. ChapterFormat = style_format (ChapterFormat, ChapterURL, 'chapter-format', 'chapter-url');
  3373. end
  3374.  
  3375. if not is_set(URL) then --and
  3376. if in_array(config.CitationClass, {"web","podcast", "mailinglist"}) then -- Test if cite web or cite podcast |url= is missing or empty
  3377. table.insert( z.message_tail, { set_error( 'cite_web_url', {}, true ) } );
  3378. end
  3379. -- Test if accessdate is given without giving a URL
  3380. if is_set(AccessDate) and not is_set(ChapterURL)then -- ChapterURL may be set when the others are not set; TODO: move this to a separate test?
  3381. table.insert( z.message_tail, { set_error( 'accessdate_missing_url', {}, true ) } );
  3382. AccessDate = '';
  3383. end
  3384. end
  3385.  
  3386. local OriginalURL, OriginalURLorigin, OriginalFormat; -- TODO: swap chapter and title here so that archive applies to most specific if both are set?
  3387. DeadURL = DeadURL:lower(); -- used later when assembling archived text
  3388. if is_set( ArchiveURL ) then
  3389. if is_set (URL) then
  3390. OriginalURL = URL; -- save copy of original source URL
  3391. OriginalURLorigin = URLorigin; -- name of url parameter for error messages
  3392. OriginalFormat = Format; -- and original |format=
  3393. if 'no' ~= DeadURL then -- if URL set then archive-url applies to it
  3394. URL = ArchiveURL -- swap-in the archive's url
  3395. URLorigin = A:ORIGIN('ArchiveURL') -- name of archive url parameter for error messages
  3396. Format = ArchiveFormat or ''; -- swap in archive's format
  3397. end
  3398. elseif is_set (ChapterURL) then -- URL not set so if chapter-url is set apply archive url to it
  3399. OriginalURL = ChapterURL; -- save copy of source chapter's url for archive text
  3400. OriginalURLorigin = ChapterURLorigin; -- name of chapter-url parameter for error messages
  3401. OriginalFormat = ChapterFormat; -- and original |format=
  3402. if 'no' ~= DeadURL then
  3403. ChapterURL = ArchiveURL -- swap-in the archive's url
  3404. ChapterURLorigin = A:ORIGIN('ArchiveURL') -- name of archive-url parameter for error messages
  3405. ChapterFormat = ArchiveFormat or ''; -- swap in archive's format
  3406. end
  3407. end
  3408. end
  3409.  
  3410. if in_array(config.CitationClass, {'web','news','journal', 'magazine', 'pressrelease','podcast', 'newsgroup', 'arxiv'}) or -- if any of the 'periodical' cites except encyclopedia
  3411. ('citation' == config.CitationClass and is_set (Periodical) and not is_set (Encyclopedia)) then
  3412. local chap_param;
  3413. if is_set (Chapter) then -- get a parameter name from one of these chapter related meta-parameters
  3414. chap_param = A:ORIGIN ('Chapter')
  3415. elseif is_set (TransChapter) then
  3416. chap_param = A:ORIGIN ('TransChapter')
  3417. elseif is_set (ChapterURL) then
  3418. chap_param = A:ORIGIN ('ChapterURL')
  3419. elseif is_set (ScriptChapter) then
  3420. chap_param = A:ORIGIN ('ScriptChapter')
  3421. else is_set (ChapterFormat)
  3422. chap_param = A:ORIGIN ('ChapterFormat')
  3423. end
  3424.  
  3425. if is_set (chap_param) then -- if we found one
  3426. table.insert( z.message_tail, { set_error( 'chapter_ignored', {chap_param}, true ) } ); -- add error message
  3427. Chapter = ''; -- and set them to empty string to be safe with concatenation
  3428. TransChapter = '';
  3429. ChapterURL = '';
  3430. ScriptChapter = '';
  3431. ChapterFormat = '';
  3432. end
  3433. else -- otherwise, format chapter / article title
  3434. local no_quotes = false; -- default assume that we will be quoting the chapter parameter value
  3435. if is_set (Contribution) and 0 < #c then -- if this is a contribution with contributor(s)
  3436. if in_array (Contribution:lower(), cfg.keywords.contribution) then -- and a generic contribution title
  3437. no_quotes = true; -- then render it unquoted
  3438. end
  3439. end
  3440.  
  3441. Chapter = format_chapter_title (ScriptChapter, Chapter, TransChapter, ChapterURL, ChapterURLorigin, no_quotes); -- Contribution is also in Chapter
  3442. if is_set (Chapter) then
  3443. if 'map' == config.CitationClass and is_set (TitleType) then
  3444. Chapter = Chapter .. ' ' .. TitleType;
  3445. end
  3446. Chapter = Chapter .. ChapterFormat .. sepc .. ' ';
  3447. elseif is_set (ChapterFormat) then -- |chapter= not set but |chapter-format= is so ...
  3448. Chapter = ChapterFormat .. sepc .. ' '; -- ... ChapterFormat has error message, we want to see it
  3449. end
  3450. end
  3451.  
  3452. -- Format main title.
  3453. if is_set(TitleLink) and is_set(Title) then
  3454. Title = "[[" .. TitleLink .. "|" .. Title .. "]]"
  3455. end
  3456.  
  3457. if in_array(config.CitationClass, {'web','news','journal', 'magazine', 'pressrelease','podcast', 'newsgroup', 'mailinglist', 'arxiv'}) or
  3458. ('citation' == config.CitationClass and is_set (Periodical) and not is_set (Encyclopedia)) or
  3459. ('map' == config.CitationClass and is_set (Periodical)) then -- special case for cite map when the map is in a periodical treat as an article
  3460. Title = kern_quotes (Title); -- if necessary, separate title's leading and trailing quote marks from Module provided quote marks
  3461. Title = wrap_style ('quoted-title', Title);
  3462. Title = script_concatenate (Title, ScriptTitle); -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
  3463. TransTitle= wrap_style ('trans-quoted-title', TransTitle );
  3464. elseif 'report' == config.CitationClass then -- no styling for cite report
  3465. Title = script_concatenate (Title, ScriptTitle); -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
  3466. TransTitle= wrap_style ('trans-quoted-title', TransTitle ); -- for cite report, use this form for trans-title
  3467. else
  3468. Title = wrap_style ('italic-title', Title);
  3469. Title = script_concatenate (Title, ScriptTitle); -- <bdi> tags, lang atribute, categorization, etc; must be done after title is wrapped
  3470. TransTitle = wrap_style ('trans-italic-title', TransTitle);
  3471. end
  3472.  
  3473. TransError = "";
  3474. if is_set(TransTitle) then
  3475. if is_set(Title) then
  3476. TransTitle = " " .. TransTitle;
  3477. else
  3478. TransError = " " .. set_error( 'trans_missing_title', {'title'} );
  3479. end
  3480. end
  3481. Title = Title .. TransTitle;
  3482. if is_set(Title) then
  3483. if not is_set(TitleLink) and is_set(URL) then
  3484. Title = external_link( URL, Title, URLorigin ) .. TransError .. Format;
  3485. URL = "";
  3486. Format = "";
  3487. else
  3488. Title = Title .. TransError;
  3489. end
  3490. end
  3491.  
  3492. if is_set(Place) then
  3493. Place = " " .. wrap_msg ('written', Place, use_lowercase) .. sepc .. " ";
  3494. end
  3495.  
  3496. if is_set (Conference) then
  3497. if is_set (ConferenceURL) then
  3498. Conference = external_link( ConferenceURL, Conference, ConferenceURLorigin );
  3499. end
  3500. Conference = sepc .. " " .. Conference .. ConferenceFormat;
  3501. elseif is_set(ConferenceURL) then
  3502. Conference = sepc .. " " .. external_link( ConferenceURL, nil, ConferenceURLorigin );
  3503. end
  3504.  
  3505. if not is_set(Position) then
  3506. local Minutes = A['Minutes'];
  3507. local Time = A['Time'];
  3508.  
  3509. if is_set(Minutes) then
  3510. if is_set (Time) then
  3511. table.insert( z.message_tail, { set_error( 'redundant_parameters', {wrap_style ('parameter', 'minutes') .. ' and ' .. wrap_style ('parameter', 'time')}, true ) } );
  3512. end
  3513. Position = " " .. Minutes .. " " .. cfg.messages['minutes'];
  3514. else
  3515. if is_set(Time) then
  3516. local TimeCaption = A['TimeCaption']
  3517. if not is_set(TimeCaption) then
  3518. TimeCaption = cfg.messages['event'];
  3519. if sepc ~= '.' then
  3520. TimeCaption = TimeCaption:lower();
  3521. end
  3522. end
  3523. Position = " " .. TimeCaption .. " " .. Time;
  3524. end
  3525. end
  3526. else
  3527. Position = " " .. Position;
  3528. At = '';
  3529. end
  3530.  
  3531. Page, Pages, Sheet, Sheets = format_pages_sheets (Page, Pages, Sheet, Sheets, config.CitationClass, Periodical_origin, sepc, NoPP, use_lowercase);
  3532.  
  3533. At = is_set(At) and (sepc .. " " .. At) or "";
  3534. Position = is_set(Position) and (sepc .. " " .. Position) or "";
  3535. if config.CitationClass == 'map' then
  3536. local Section = A['Section'];
  3537. local Sections = A['Sections'];
  3538. local Inset = A['Inset'];
  3539. if is_set( Inset ) then
  3540. Inset = sepc .. " " .. wrap_msg ('inset', Inset, use_lowercase);
  3541. end
  3542.  
  3543. if is_set( Sections ) then
  3544. Section = sepc .. " " .. wrap_msg ('sections', Sections, use_lowercase);
  3545. elseif is_set( Section ) then
  3546. Section = sepc .. " " .. wrap_msg ('section', Section, use_lowercase);
  3547. end
  3548. At = At .. Inset .. Section;
  3549. end
  3550.  
  3551. if is_set (Language) then
  3552. Language = language_parameter (Language); -- format, categories, name from ISO639-1, etc
  3553. else
  3554. Language=""; -- language not specified so make sure this is an empty string;
  3555. end
  3556.  
  3557. Others = is_set(Others) and (sepc .. " " .. Others) or "";
  3558. if is_set (Translators) then
  3559. Others = sepc .. ' 由' .. Translators .. '翻译 ' .. Others;
  3560. end
  3561.  
  3562. TitleNote = is_set(TitleNote) and (sepc .. " " .. TitleNote) or "";
  3563. if is_set (Edition) then
  3564. if Edition:match ('%f[%a][Ee]d%.?$') or Edition:match ('%f[%a][Ee]dition$') then
  3565. add_maint_cat ('extra_text', 'edition');
  3566. end
  3567. Edition = " " .. wrap_msg ('edition', Edition);
  3568. else
  3569. Edition = '';
  3570. end
  3571.  
  3572. Series = is_set(Series) and (sepc .. " " .. Series) or "";
  3573. OrigYear = is_set(OrigYear) and (" [" .. OrigYear .. "]") or "";
  3574. Agency = is_set(Agency) and (sepc .. " " .. Agency) or "";
  3575.  
  3576. Volume = format_volume_issue (Volume, Issue, config.CitationClass, Periodical_origin, sepc, use_lowercase);
  3577.  
  3578. ------------------------------------ totally unrelated data
  3579. if is_set(Via) then
  3580. Via = " " .. wrap_msg ('via', Via);
  3581. end
  3582.  
  3583. --[[
  3584. Subscription implies paywall; Registration does not. If both are used in a citation, the subscription required link
  3585. note is displayed. There are no error messages for this condition.
  3586.  
  3587. ]]
  3588. if is_set (SubscriptionRequired) then
  3589. SubscriptionRequired = sepc .. " " .. cfg.messages['subscription']; -- subscription required message
  3590. elseif is_set (RegistrationRequired) then
  3591. SubscriptionRequired = sepc .. " " .. cfg.messages['registration']; -- registration required message
  3592. else
  3593. SubscriptionRequired = ''; -- either or both might be set to something other than yes true y
  3594. end
  3595.  
  3596. if is_set(AccessDate) then
  3597. local retrv_text = " " .. cfg.messages['retrieved']
  3598.  
  3599. AccessDate = nowrap_date (AccessDate); -- wrap in nowrap span if date in appropriate format
  3600. if (sepc ~= ".") then retrv_text = retrv_text:lower() end -- if 'citation', lower case
  3601. AccessDate = substitute (retrv_text, AccessDate); -- add retrieved text
  3602. -- neither of these work; don't know why; it seems that substitute() isn't being called
  3603. AccessDate = substitute (cfg.presentation['accessdate'], {sepc, AccessDate}); -- allow editors to hide accessdates
  3604. end
  3605. if is_set(ID) then ID = sepc .." ".. ID; end
  3606. if "thesis" == config.CitationClass and is_set(Docket) then
  3607. ID = sepc .." Docket ".. Docket .. ID;
  3608. end
  3609. if "report" == config.CitationClass and is_set(Docket) then -- for cite report when |docket= is set
  3610. ID = sepc .. ' ' .. Docket; -- overwrite ID even if |id= is set
  3611. end
  3612.  
  3613. ID_list = build_id_list( ID_list, {DoiBroken = DoiBroken, ASINTLD = ASINTLD, IgnoreISBN = IgnoreISBN, Embargo=Embargo, Class = Class} );
  3614.  
  3615. if is_set(URL) then
  3616. URL = " " .. external_link( URL, nil, URLorigin );
  3617. end
  3618.  
  3619. if is_set(Quote) then
  3620. if Quote:sub(1,1) == '"' and Quote:sub(-1,-1) == '"' then -- if first and last characters of quote are quote marks
  3621. Quote = Quote:sub(2,-2); -- strip them off
  3622. end
  3623. Quote = sepc .." " .. wrap_style ('quoted-text', Quote ); -- wrap in <q>...</q> tags
  3624. PostScript = ""; -- cs1|2 does not supply terminal punctuation when |quote= is set
  3625. end
  3626. local Archived
  3627. if is_set(ArchiveURL) then
  3628. if not is_set(ArchiveDate) then
  3629. ArchiveDate = set_error('archive_missing_date');
  3630. end
  3631. if "no" == DeadURL then
  3632. local arch_text = cfg.messages['archived'];
  3633. if sepc ~= "." then arch_text = arch_text:lower() end
  3634. Archived = sepc .. " " .. substitute( cfg.messages['archived-not-dead'],
  3635. { external_link( ArchiveURL, arch_text, A:ORIGIN('ArchiveURL') ) .. ArchiveFormat, ArchiveDate } );
  3636. if not is_set(OriginalURL) then
  3637. Archived = Archived .. " " .. set_error('archive_missing_url');
  3638. end
  3639. elseif is_set(OriginalURL) then -- DeadURL is empty, 'yes', 'true', 'y', 'unfit', 'usurped'
  3640. local arch_text = cfg.messages['archived-dead'];
  3641. if sepc ~= "." then arch_text = arch_text:lower() end
  3642. if in_array (DeadURL, {'unfit', 'usurped'}) then
  3643. Archived = sepc .. " " .. 'Archived from the original on ' .. ArchiveDate; -- format already styled
  3644. else -- DeadURL is empty, 'yes', 'true', or 'y'
  3645. Archived = sepc .. " " .. substitute( arch_text,
  3646. { external_link( OriginalURL, cfg.messages['original'], OriginalURLorigin ) .. OriginalFormat, ArchiveDate } ); -- format already styled
  3647. end
  3648. else
  3649. local arch_text = cfg.messages['archived-missing'];
  3650. if sepc ~= "." then arch_text = arch_text:lower() end
  3651. Archived = sepc .. " " .. substitute( arch_text,
  3652. { set_error('archive_missing_url'), ArchiveDate } );
  3653. end
  3654. elseif is_set (ArchiveFormat) then
  3655. Archived = ArchiveFormat; -- if set and ArchiveURL not set ArchiveFormat has error message
  3656. else
  3657. Archived = ""
  3658. end
  3659. local Lay = '';
  3660. if is_set(LayURL) then
  3661. if is_set(LayDate) then LayDate = " (" .. LayDate .. ")" end
  3662. if is_set(LaySource) then
  3663. LaySource = " &ndash; ''" .. safe_for_italics(LaySource) .. "''";
  3664. else
  3665. LaySource = "";
  3666. end
  3667. if sepc == '.' then
  3668. Lay = sepc .. " " .. external_link( LayURL, cfg.messages['lay summary'], A:ORIGIN('LayURL') ) .. LayFormat .. LaySource .. LayDate
  3669. else
  3670. Lay = sepc .. " " .. external_link( LayURL, cfg.messages['lay summary']:lower(), A:ORIGIN('LayURL') ) .. LayFormat .. LaySource .. LayDate
  3671. end
  3672. elseif is_set (LayFormat) then -- Test if |lay-format= is given without giving a |lay-url=
  3673. Lay = sepc .. LayFormat; -- if set and LayURL not set, then LayFormat has error message
  3674. end
  3675.  
  3676. if is_set(Transcript) then
  3677. if is_set(TranscriptURL) then
  3678. Transcript = external_link( TranscriptURL, Transcript, TranscriptURLorigin );
  3679. end
  3680. Transcript = sepc .. ' ' .. Transcript .. TranscriptFormat;
  3681. elseif is_set(TranscriptURL) then
  3682. Transcript = external_link( TranscriptURL, nil, TranscriptURLorigin );
  3683. end
  3684.  
  3685. local Publisher;
  3686. if is_set(Periodical) and
  3687. not in_array(config.CitationClass, {"encyclopaedia","web","pressrelease","podcast"}) then
  3688. if is_set(PublisherName) then
  3689. if is_set(PublicationPlace) then
  3690. Publisher = PublicationPlace .. ": " .. PublisherName;
  3691. else
  3692. Publisher = PublisherName;
  3693. end
  3694. elseif is_set(PublicationPlace) then
  3695. Publisher= PublicationPlace;
  3696. else
  3697. Publisher = "";
  3698. end
  3699. if is_set(Publisher) then
  3700. Publisher = " (" .. Publisher .. ")";
  3701. end
  3702. else
  3703. if is_set(PublisherName) then
  3704. if is_set(PublicationPlace) then
  3705. Publisher = sepc .. " " .. PublicationPlace .. ": " .. PublisherName;
  3706. else
  3707. Publisher = sepc .. " " .. PublisherName;
  3708. end
  3709. elseif is_set(PublicationPlace) then
  3710. Publisher= sepc .. " " .. PublicationPlace;
  3711. else
  3712. Publisher = '';
  3713. end
  3714. end
  3715. -- Several of the above rely upon detecting this as nil, so do it last.
  3716. if is_set(Periodical) then
  3717. if is_set(Title) or is_set(TitleNote) then
  3718. Periodical = sepc .. " " .. wrap_style ('italic-title', Periodical)
  3719. else
  3720. Periodical = wrap_style ('italic-title', Periodical)
  3721. end
  3722. end
  3723.  
  3724. --[[
  3725. Handle the oddity that is cite speech. This code overrides whatever may be the value assigned to TitleNote (through |department=) and forces it to be " (Speech)" so that
  3726. the annotation directly follows the |title= parameter value in the citation rather than the |event= parameter value (if provided).
  3727. ]]
  3728. if "speech" == config.CitationClass then -- cite speech only
  3729. TitleNote = " (Speech)"; -- annotate the citation
  3730. if is_set (Periodical) then -- if Periodical, perhaps because of an included |website= or |journal= parameter
  3731. if is_set (Conference) then -- and if |event= is set
  3732. Conference = Conference .. sepc .. " "; -- then add appropriate punctuation to the end of the Conference variable before rendering
  3733. end
  3734. end
  3735. end
  3736.  
  3737. -- Piece all bits together at last. Here, all should be non-nil.
  3738. -- We build things this way because it is more efficient in LUA
  3739. -- not to keep reassigning to the same string variable over and over.
  3740.  
  3741. local tcommon;
  3742. local tcommon2; -- used for book cite when |contributor= is set
  3743. if in_array(config.CitationClass, {"journal","citation"}) and is_set(Periodical) then
  3744. if is_set(Others) then Others = Others .. sepc .. " " end
  3745. tcommon = safe_join( {Others, Title, TitleNote, Conference, Periodical, Format, TitleType, Series,
  3746. Edition, Publisher, Agency}, sepc );
  3747. elseif in_array(config.CitationClass, {"book","citation"}) and not is_set(Periodical) then -- special cases for book cites
  3748. if is_set (Contributors) then -- when we are citing foreword, preface, introduction, etc
  3749. tcommon = safe_join( {Title, TitleNote}, sepc ); -- author and other stuff will come after this and before tcommon2
  3750. tcommon2 = safe_join( {Conference, Periodical, Format, TitleType, Series, Volume, Others, Edition, Publisher, Agency}, sepc );
  3751. else
  3752. tcommon = safe_join( {Title, TitleNote, Conference, Periodical, Format, TitleType, Series, Volume, Others, Edition, Publisher, Agency}, sepc );
  3753. end
  3754.  
  3755. elseif 'map' == config.CitationClass then -- special cases for cite map
  3756. if is_set (Chapter) then -- map in a book; TitleType is part of Chapter
  3757. tcommon = safe_join( {Title, Format, Edition, Scale, Series, Cartography, Others, Publisher, Volume}, sepc );
  3758. elseif is_set (Periodical) then -- map in a periodical
  3759. tcommon = safe_join( {Title, TitleType, Format, Periodical, Scale, Series, Cartography, Others, Publisher, Volume}, sepc );
  3760. else -- a sheet or stand-alone map
  3761. tcommon = safe_join( {Title, TitleType, Format, Edition, Scale, Series, Cartography, Others, Publisher}, sepc );
  3762. end
  3763. elseif 'episode' == config.CitationClass then -- special case for cite episode
  3764. tcommon = safe_join( {Title, TitleNote, TitleType, Series, Transcript, Edition, Publisher}, sepc );
  3765. else -- all other CS1 templates
  3766. tcommon = safe_join( {Title, TitleNote, Conference, Periodical, Format, TitleType, Series,
  3767. Volume, Others, Edition, Publisher, Agency}, sepc );
  3768. end
  3769. if #ID_list > 0 then
  3770. ID_list = safe_join( { sepc .. " ", table.concat( ID_list, sepc .. " " ), ID }, sepc );
  3771. else
  3772. ID_list = ID;
  3773. end
  3774. -- LOCAL
  3775. local xDate = Date
  3776. local pgtext = Position .. Sheet .. Sheets .. Page .. Pages .. At;
  3777. if ( is_set(Periodical) and Date ~= '' and
  3778. not in_array(config.CitationClass, {"encyclopaedia","web"}) )
  3779. or ( in_array(config.CitationClass, {"book","news"}) ) then
  3780. if in_array(config.CitationClass, {"journal","citation"}) and ( Volume ~= '' or Issue ~= '' ) then
  3781. xDate = xDate .. ',' .. Volume
  3782. end
  3783. xDate = xDate .. pgtext
  3784. pgtext = ''
  3785. end
  3786. if PublicationDate and PublicationDate ~= '' then
  3787. xDate = xDate .. ' (' .. PublicationDate .. ')'
  3788. end
  3789. if OrigYear ~= '' then
  3790. xDate = xDate .. OrigYear
  3791. end
  3792. if AccessDate ~= '' then
  3793. xDate = xDate .. ' ' .. AccessDate
  3794. end
  3795. if xDate ~= '' then
  3796. xDate = sepc .. ' ' .. xDate
  3797. end
  3798. -- END LOCAL
  3799. local idcommon = safe_join( { URL, xDate, ID_list, Archived, Via, SubscriptionRequired, Lay, Language, Quote }, sepc );
  3800. local text;
  3801.  
  3802. if is_set(Authors) then
  3803. if is_set(Coauthors) then
  3804. if 'vanc' == NameListFormat then -- separate authors and coauthors with proper name-list-separator
  3805. Authors = Authors .. ', ' .. Coauthors;
  3806. else
  3807. Authors = Authors .. '; ' .. Coauthors;
  3808. end
  3809. end
  3810. Authors = terminate_name_list (Authors, sepc); -- when no date, terminate with 0 or 1 sepc and a space
  3811. if is_set(Editors) then
  3812. local in_text = " ";
  3813. local post_text = "";
  3814. if is_set(Chapter) and 0 == #c then
  3815. in_text = in_text .. cfg.messages['in'] .. " "
  3816. if (sepc ~= '.') then in_text = in_text:lower() end -- lowercase for cs2
  3817. else
  3818. if EditorCount <= 1 then
  3819. post_text = ", " .. cfg.messages['editor'];
  3820. else
  3821. post_text = ", " .. cfg.messages['editors'];
  3822. end
  3823. end
  3824. Editors = terminate_name_list (in_text .. Editors .. post_text, sepc); -- terminate with 0 or 1 sepc and a space
  3825. end
  3826. if is_set (Contributors) then -- book cite and we're citing the intro, preface, etc
  3827. local by_text = sepc .. ' ' .. cfg.messages['by'] .. ' ';
  3828. if (sepc ~= '.') then by_text = by_text:lower() end -- lowercase for cs2
  3829. Authors = by_text .. Authors; -- author follows title so tweak it here
  3830. if is_set (Editors) then -- when Editors make sure that Authors gets terminated
  3831. Authors = terminate_name_list (Authors, sepc); -- terminate with 0 or 1 sepc and a space
  3832. end
  3833. Contributors = terminate_name_list (Contributors, sepc); -- terminate with 0 or 1 sepc and a space
  3834. text = safe_join( {Contributors, Chapter, tcommon, Authors, Place, Editors, tcommon2, pgtext, idcommon }, sepc );
  3835. else
  3836. text = safe_join( {Authors, Chapter, Place, Editors, tcommon, pgtext, idcommon }, sepc );
  3837. end
  3838. elseif is_set(Editors) then
  3839. if EditorCount <= 1 then
  3840. Editors = Editors .. " (" .. cfg.messages['editor'] .. ")" .. sepc .. " "
  3841. else
  3842. Editors = Editors .. " (" .. cfg.messages['editors'] .. ")" .. sepc .. " "
  3843. end
  3844. text = safe_join( {Editors, Chapter, Place, tcommon, pgtext, idcommon}, sepc );
  3845. else
  3846. if config.CitationClass=="journal" and is_set(Periodical) then
  3847. text = safe_join( {Chapter, Place, tcommon, pgtext, idcommon}, sepc );
  3848. else
  3849. text = safe_join( {Chapter, Place, tcommon, pgtext, idcommon}, sepc );
  3850. end
  3851. end
  3852. if is_set(PostScript) and PostScript ~= sepc then
  3853. text = safe_join( {text, sepc}, sepc ); --Deals with italics, spaces, etc.
  3854. text = text:sub(1,-sepc:len()-1);
  3855. end
  3856. text = safe_join( {text, PostScript}, sepc );
  3857.  
  3858. -- Now enclose the whole thing in a <cite/> element
  3859. local options = {};
  3860. if is_set(config.CitationClass) and config.CitationClass ~= "citation" then
  3861. options.class = config.CitationClass;
  3862. options.class = "citation " .. config.CitationClass; -- class=citation required for blue highlight when used with |ref=
  3863. else
  3864. options.class = "citation";
  3865. end
  3866. if is_set(Ref) and Ref:lower() ~= "none" then -- set reference anchor if appropriate
  3867. local id = Ref
  3868. if ('harv' == Ref ) then
  3869. local namelist = {}; -- holds selected contributor, author, editor name list
  3870. -- local year = first_set (Year, anchor_year); -- Year first for legacy citations and for YMD dates that require disambiguation
  3871. local year = first_set ({Year, anchor_year}, 2); -- Year first for legacy citations and for YMD dates that require disambiguation
  3872.  
  3873. if #c > 0 then -- if there is a contributor list
  3874. namelist = c; -- select it
  3875. elseif #a > 0 then -- or an author list
  3876. namelist = a;
  3877. elseif #e > 0 then -- or an editor list
  3878. namelist = e;
  3879. end
  3880. id = anchor_id (namelist, year); -- go make the CITEREF anchor
  3881. end
  3882. options.id = id;
  3883. end
  3884. if string.len(text:gsub("<span[^>/]*>.-</span>", ""):gsub("%b<>","")) <= 2 then
  3885. z.error_categories = {};
  3886. text = set_error('empty_citation');
  3887. z.message_tail = {};
  3888. end
  3889. if is_set(options.id) then
  3890. text = '<cite id="' .. mw.uri.anchorEncode(options.id) ..'" class="' .. mw.text.nowiki(options.class) .. '">' .. text .. "</cite>";
  3891. else
  3892. text = '<cite class="' .. mw.text.nowiki(options.class) .. '">' .. text .. "</cite>";
  3893. end
  3894.  
  3895. local empty_span = '<span style="display:none;">&nbsp;</span>';
  3896. -- Note: Using display: none on the COinS span breaks some clients.
  3897. local OCinS = '<span title="' .. OCinSoutput .. '" class="Z3988">' .. empty_span .. '</span>';
  3898. text = text .. OCinS;
  3899. if #z.message_tail ~= 0 then
  3900. text = text .. " ";
  3901. for i,v in ipairs( z.message_tail ) do
  3902. if is_set(v[1]) then
  3903. if i == #z.message_tail then
  3904. text = text .. error_comment( v[1], v[2] );
  3905. else
  3906. text = text .. error_comment( v[1] .. "; ", v[2] );
  3907. end
  3908. end
  3909. end
  3910. end
  3911.  
  3912. if #z.maintenance_cats ~= 0 then
  3913. text = text .. '<span class="citation-comment" style="display:none; color:#33aa33">';
  3914. for _, v in ipairs( z.maintenance_cats ) do -- append maintenance categories
  3915. text = text .. ' ' .. v .. ' ([[:Category:' .. v ..'|link]])';
  3916. end
  3917. text = text .. '</span>'; -- maintenance mesages (realy just the names of the categories for now)
  3918. end
  3919. no_tracking_cats = no_tracking_cats:lower();
  3920. if in_array(no_tracking_cats, {"", "no", "false", "n"}) then
  3921. for _, v in ipairs( z.error_categories ) do
  3922. text = text .. '[[Category:' .. v ..']]';
  3923. end
  3924. for _, v in ipairs( z.maintenance_cats ) do -- append maintenance categories
  3925. text = text .. '[[Category:' .. v ..']]';
  3926. end
  3927. end
  3928. return text
  3929. end
  3930.  
  3931. --[[--------------------------< H A S _ I N V I S I B L E _ C H A R S >----------------------------------------
  3932.  
  3933. This function searches a parameter's value for nonprintable or invisible characters. The search stops at the first match.
  3934.  
  3935. Sometime after this module is done with rendering a citation, some C0 control characters are replaced with the
  3936. replacement character. That replacement character is not detected by this test though it is visible to readers
  3937. of the rendered citation. This function will detect the replacement character when it is part of the wikisource.
  3938.  
  3939. Output of this function is an error message that identifies the character or the Unicode group that the character
  3940. belongs to along with its position in the parameter value.
  3941.  
  3942. ]]
  3943. --[[
  3944. local function has_invisible_chars (param, v)
  3945. local position = '';
  3946. local i=1;
  3947.  
  3948. while cfg.invisible_chars[i] do
  3949. local char=cfg.invisible_chars[i][1] -- the character or group name
  3950. local pattern=cfg.invisible_chars[i][2] -- the pattern used to find it
  3951. v = mw.text.unstripNoWiki( v ); -- remove nowiki stripmarkers
  3952. position = mw.ustring.find (v, pattern) -- see if the parameter value contains characters that match the pattern
  3953. if position then
  3954. table.insert( z.message_tail, { set_error( 'invisible_char', {char, wrap_style ('parameter', param), position}, true ) } ); -- add error message
  3955. return; -- and done with this parameter
  3956. end
  3957. i=i+1; -- bump our index
  3958. end
  3959. end
  3960. ]]
  3961.  
  3962. --[[--------------------------< Z . C I T A T I O N >----------------------------------------------------------
  3963.  
  3964. This is used by templates such as {{cite book}} to create the actual citation text.
  3965.  
  3966. ]]
  3967.  
  3968. function z.citation(frame)
  3969. local pframe = frame:getParent()
  3970. local validation;
  3971. if nil ~= string.find (frame:getTitle(), 'sandbox', 1, true) then -- did the {{#invoke:}} use sandbox version?
  3972. cfg = mw.loadData ('Module:Citation/CS1/Configuration/sandbox'); -- load sandbox versions of Configuration and Whitelist and ...
  3973. whitelist = mw.loadData ('Module:Citation/CS1/Whitelist/sandbox');
  3974. validation = require ('Module:Citation/CS1/Date_validation/sandbox'); -- ... sandbox version of date validation code
  3975.  
  3976. else -- otherwise
  3977. cfg = mw.loadData ('Module:Citation/CS1/Configuration'); -- load live versions of Configuration and Whitelist and ...
  3978. whitelist = mw.loadData ('Module:Citation/CS1/Whitelist');
  3979. validation = require ('Module:Citation/CS1/Date_validation'); -- ... live version of date validation code
  3980. end
  3981.  
  3982. dates = validation.dates; -- imported functions
  3983. year_date_check = validation.year_date_check;
  3984.  
  3985. local args = {};
  3986. local suggestions = {};
  3987. local error_text, error_state;
  3988.  
  3989. local config = {};
  3990. for k, v in pairs( frame.args ) do
  3991. config[k] = v;
  3992. args[k] = v;
  3993. end
  3994.  
  3995. local capture; -- the single supported capture when matching unknown parameters using patterns
  3996. for k, v in pairs( pframe.args ) do
  3997. if v ~= '' then
  3998. if not validate( k ) then
  3999. error_text = "";
  4000. if type( k ) ~= 'string' then
  4001. -- Exclude empty numbered parameters
  4002. if v:match("%S+") ~= nil then
  4003. error_text, error_state = set_error( 'text_ignored', {v}, true );
  4004. end
  4005. elseif validate( k:lower() ) then
  4006. error_text, error_state = set_error( 'parameter_ignored_suggest', {k, k:lower()}, true );
  4007. else
  4008. if nil == suggestions.suggestions then -- if this table is nil then we need to load it
  4009. if nil ~= string.find (frame:getTitle(), 'sandbox', 1, true) then -- did the {{#invoke:}} use sandbox version?
  4010. suggestions = mw.loadData( 'Module:Citation/CS1/Suggestions/sandbox' ); -- use the sandbox version
  4011. else
  4012. suggestions = mw.loadData( 'Module:Citation/CS1/Suggestions' ); -- use the live version
  4013. end
  4014. end
  4015. for pattern, param in pairs (suggestions.patterns) do -- loop through the patterns to see if we can suggest a proper parameter
  4016. capture = k:match (pattern); -- the whole match if no caputre in pattern else the capture if a match
  4017. if capture then -- if the pattern matches
  4018. param = substitute( param, capture ); -- add the capture to the suggested parameter (typically the enumerator)
  4019. error_text, error_state = set_error( 'parameter_ignored_suggest', {k, param}, true ); -- set the error message
  4020. end
  4021. end
  4022. if not is_set (error_text) then -- couldn't match with a pattern, is there an expicit suggestion?
  4023. if suggestions.suggestions[ k:lower() ] ~= nil then
  4024. error_text, error_state = set_error( 'parameter_ignored_suggest', {k, suggestions.suggestions[ k:lower() ]}, true );
  4025. else
  4026. error_text, error_state = set_error( 'parameter_ignored', {k}, true );
  4027. end
  4028. end
  4029. end
  4030. if error_text ~= '' then
  4031. table.insert( z.message_tail, {error_text, error_state} );
  4032. end
  4033. end
  4034. args[k] = v;
  4035. elseif args[k] ~= nil or (k == 'postscript') then
  4036. args[k] = v;
  4037. end
  4038. end
  4039.  
  4040. for k, v in pairs( args ) do
  4041. if 'string' == type (k) then -- don't evaluate positional parameters
  4042. has_invisible_chars (k, v);
  4043. end
  4044. end
  4045. return citation0( config, args)
  4046. end
  4047.  
  4048. return z