sync from sandbox;
細 (1版) |
bsd>Trappist the monk (sync from sandbox;) |
||
1行目: | 1行目: | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | |||
]] | |||
local add_prop_cat, is_set, in_array, wrap_style; -- imported functions from selected Module:Citation/CS1/Utilities | |||
local cfg; -- table of tables imported from selected Module:Citation/CS1/Configuration | |||
--[[--------------------------< F I L E - S C O P E D E C L A R A T I O N S >-------------------------------- | |||
File-scope variables are declared here | |||
]] | ]] | ||
local | local lang_object = mw.getContentLanguage(); -- used by is_valid_accessdate(), is_valid_year(), date_name_xlate(); TODO: move to ~/Configuration? | ||
local | local year_limit; -- used by is_valid_year() | ||
--[=[-------------------------< I S _ V A L I D _ A C C E S S D A T E >---------------------------------------- | --[=[-------------------------< I S _ V A L I D _ A C C E S S D A T E >---------------------------------------- | ||
15行目: | 23行目: | ||
Wikipedia start date is 2001-01-15T00:00:00 UTC which is 979516800 seconds after 1970-01-01T00:00:00 UTC (the start of Unix time) | Wikipedia start date is 2001-01-15T00:00:00 UTC which is 979516800 seconds after 1970-01-01T00:00:00 UTC (the start of Unix time) | ||
accessdate is the date provided in | | accessdate is the date provided in |access-date= at time 00:00:00 UTC | ||
today is the current date at time 00:00:00 UTC plus 48 hours | today is the current date at time 00:00:00 UTC plus 48 hours | ||
if today is 2015-01-01T00:00:00 then | if today is 2015-01-01T00:00:00 then | ||
22行目: | 30行目: | ||
This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser | This function does not work if it is fed month names for languages other than English. Wikimedia #time: parser | ||
apparently doesn't understand non- | apparently doesn't understand non-English date month names. This function will always return false when the date | ||
contains a non-English month name because good1 is false after the call to lang.formatDate(). To get around that | contains a non-English month name because good1 is false after the call to lang.formatDate(). To get around that | ||
call this function with YYYY-MM-DD format dates. | call this function with YYYY-MM-DD format dates. | ||
29行目: | 37行目: | ||
local function is_valid_accessdate (accessdate) | local function is_valid_accessdate (accessdate) | ||
local good1, good2; | local good1, good2; | ||
local access_ts, tomorrow_ts; -- to hold | local access_ts, tomorrow_ts; -- to hold Unix time stamps representing the dates | ||
good1, access_ts = pcall( | good1, access_ts = pcall (lang_object.formatDate, lang_object, 'U', accessdate ); -- convert accessdate value to Unix timestamp | ||
good2, tomorrow_ts = pcall( | good2, tomorrow_ts = pcall (lang_object.formatDate, lang_object, 'U', 'today + 2 days' ); -- today midnight + 2 days is one second more than all day tomorrow | ||
if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand | if good1 and good2 then -- lang.formatDate() returns a timestamp in the local script which which tonumber() may not understand | ||
access_ts = tonumber (access_ts) or | access_ts = tonumber (access_ts) or lang_object:parseFormattedNumber (access_ts); -- convert to numbers for the comparison; | ||
tomorrow_ts = tonumber (tomorrow_ts) or | tomorrow_ts = tonumber (tomorrow_ts) or lang_object:parseFormattedNumber (tomorrow_ts); | ||
else | else | ||
return false; -- one or both failed to convert to | return false; -- one or both failed to convert to Unix time stamp | ||
end | end | ||
48行目: | 55行目: | ||
return false; -- accessdate out of range | return false; -- accessdate out of range | ||
end | end | ||
end | end | ||
68行目: | 62行目: | ||
returns true and date value if that value has proper dmy, mdy, ymd format. | returns true and date value if that value has proper dmy, mdy, ymd format. | ||
returns false and 9999 (embargoed forever) when date value is not proper format; assumes that when |embargo= is | returns false and 9999 (embargoed forever) when date value is not proper format; assumes that when |pmc-embargo-date= is | ||
set, the editor intended to embargo a | set, the editor intended to embargo a PMC but |pmc-embargo-date= does not hold a single date. | ||
]] | ]] | ||
80行目: | 74行目: | ||
end | end | ||
return false, '9999'; -- if here not good date so return false and set embargo date to long time in future | return false, '9999'; -- if here not good date so return false and set embargo date to long time in future | ||
end | |||
--[[--------------------------< G E T _ M O N T H _ N U M B E R >---------------------------------------------- | |||
returns a number according to the month in a date: 1 for January, etc. Capitalization and spelling must be correct. | |||
If not a valid month, returns 0 | |||
]] | |||
local function get_month_number (month) | |||
return cfg.date_names['local'].long[month] or cfg.date_names['local'].short[month] or -- look for local names first | |||
cfg.date_names['en'].long[month] or cfg.date_names['en'].short[month] or -- failing that, look for English names | |||
0; -- not a recognized month name | |||
end | end | ||
85行目: | 93行目: | ||
--[[--------------------------< G E T _ S E A S O N _ N U M B E R >-------------------------------------------- | --[[--------------------------< G E T _ S E A S O N _ N U M B E R >-------------------------------------------- | ||
returns a number according to the sequence of seasons in a year: | returns a number according to the sequence of seasons in a year: 21 for Spring, etc. Capitalization and spelling | ||
must be correct. If not a valid season, returns 0. | |||
21-24 = Spring, Summer, Autumn, Winter, independent of “Hemisphere” | |||
returns 0 when <param> is not |date= | |||
Season numbering is defined by Extended Date/Time Format (EDTF) specification (https://www.loc.gov/standards/datetime/) | |||
which became part of ISO 8601 in 2019. See '§Sub-year groupings'. The standard defines various divisions using | |||
numbers 21-41. cs1|2 only supports generic seasons. EDTF does support the distinction between north and south | |||
hemispere seasons but cs1|2 has no way to make that distinction. | |||
These additional divisions not currently supported: | These additional divisions not currently supported: | ||
96行目: | 110行目: | ||
37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each) | 37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each) | ||
40-41 = Semestral 1, Semestral-2 (6 months each) | 40-41 = Semestral 1, Semestral-2 (6 months each) | ||
]] | ]] | ||
local function get_season_number (season) | local function get_season_number (season, param) | ||
if 'date' ~= param then | |||
return 0; -- season dates only supported by |date= | |||
end | |||
return cfg.date_names['local'].season[season] or -- look for local names first | return cfg.date_names['local'].season[season] or -- look for local names first | ||
cfg.date_names['en'].season[season] or -- failing that, look for English names | cfg.date_names['en'].season[season] or -- failing that, look for English names | ||
106行目: | 122行目: | ||
end | end | ||
returns a non-zero number if date contains a recognized proper name. Capitalization and spelling must be correct. | --[[--------------------------< G E T _ Q U A R T E R _ N U M B E R >------------------------------------------ | ||
returns a number according to the sequence of quarters in a year: 33 for first quarter, etc. Capitalization and spelling | |||
must be correct. If not a valid quarter, returns 0. | |||
33-36 = Quarter 1, Quarter 2, Quarter 3, Quarter 4 (3 months each) | |||
returns 0 when <param> is not |date= | |||
Quarter numbering is defined by Extended Date/Time Format (EDTF) specification (https://www.loc.gov/standards/datetime/) | |||
which became part of ISO 8601 in 2019. See '§Sub-year groupings'. The standard defines various divisions using | |||
numbers 21-41. cs1|2 only supports generic seasons and quarters. | |||
These additional divisions not currently supported: | |||
37-39 = Quadrimester 1, Quadrimester 2, Quadrimester 3 (4 months each) | |||
40-41 = Semestral 1, Semestral-2 (6 months each) | |||
]] | |||
local function get_quarter_number (quarter, param) | |||
if 'date' ~= param then | |||
return 0; -- quarter dates only supported by |date= | |||
end | |||
quarter = mw.ustring.gsub (quarter, ' +', ' '); -- special case replace multiple space chars with a single space char | |||
return cfg.date_names['local'].quarter[quarter] or -- look for local names first | |||
cfg.date_names['en'].quarter[quarter] or -- failing that, look for English names | |||
0; -- not a recognized quarter name | |||
end | |||
--[[--------------------------< G E T _ P R O P E R _ N A M E _ N U M B E R >---------------------------------- | |||
returns a non-zero number if date contains a recognized proper-name. Capitalization and spelling must be correct. | |||
returns 0 when <param> is not |date= | |||
]] | ]] | ||
local function | local function get_proper_name_number (name, param) | ||
if 'date' ~= param then | |||
return 0; -- proper-name dates only supported by |date= | |||
end | |||
return cfg.date_names['local'].named[name] or -- look for local names dates first | return cfg.date_names['local'].named[name] or -- look for local names dates first | ||
cfg.date_names['en'].named[name] or -- failing that, look for English names | cfg.date_names['en'].named[name] or -- failing that, look for English names | ||
118行目: | 169行目: | ||
end | end | ||
--returns true if month or season is valid (properly spelled, capitalized, abbreviated) | --[[--------------------------< G E T _ E L E M E N T _ N U M B E R <------------------------------------------ | ||
returns true if month or season or quarter or proper name is valid (properly spelled, capitalized, abbreviated) | |||
]] | ]] | ||
local function | local function get_element_number (element, param) | ||
local num; | |||
if 0 = | |||
return | local funcs = {get_month_number, get_season_number, get_quarter_number, get_proper_name_number}; -- list of functions to execute in order | ||
for _, func in ipairs (funcs) do -- spin through the function list | |||
num = func (element, param); -- call the function and get the returned number | |||
if 0 ~= num then -- non-zero when valid month season quarter | |||
return num; -- return that number | |||
end | end | ||
end | end | ||
return | return nil; -- not valid | ||
end | end | ||
--[[--------------------------< I S _ V A L I D _ Y E A R >---------------------------------------------------- | --[[--------------------------< I S _ V A L I D _ Y E A R >---------------------------------------------------- | ||
Function gets current year from the server and compares it to year from a citation parameter. Years more than one year in the future are not acceptable. | Function gets current year from the server and compares it to year from a citation parameter. Years more than one | ||
year in the future are not acceptable. | |||
]] | ]] | ||
local function is_valid_year(year) | local function is_valid_year (year) | ||
if not is_set(year_limit) then | if not is_set(year_limit) then | ||
year_limit = tonumber(os.date("%Y"))+1; | year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once | ||
end | end | ||
year = tonumber (year) or lang_object:parseFormattedNumber (year); -- convert to numbers for the comparison; | |||
return year and (year <= year_limit) or false; | |||
end | end | ||
--[[--------------------------< I S _ V A L I D _ D A T E >---------------------------------------------------- | --[[--------------------------< I S _ V A L I D _ D A T E >---------------------------------------------------- | ||
Returns true if day is less than or equal to the number of days in month and year is no farther into the future | Returns true if day is less than or equal to the number of days in month and year is no farther into the future | ||
than next year; else returns false. | than next year; else returns false. | ||
165行目: | 228行目: | ||
month = tonumber(month); -- required for YYYY-MM-DD dates | month = tonumber(month); -- required for YYYY-MM-DD dates | ||
if (2==month) then -- if February | if (2 == month) then -- if February | ||
month_length = 28; -- then 28 days unless | month_length = 28; -- then 28 days unless | ||
if 1582 > tonumber(year) then -- Julian calendar | if 1582 > tonumber(year) then -- Julian calendar | ||
if 0==(year%4) then | if 0 == (year%4) then -- is a leap year? | ||
month_length = 29; | month_length = 29; -- if leap year then 29 days in February | ||
end | end | ||
else -- Gregorian calendar | else -- Gregorian calendar | ||
if (0==(year%4) and (0~=(year%100) or 0==(year%400))) then -- is a leap year? | if (0 == (year%4) and (0 ~= (year%100) or 0 == (year%400))) then -- is a leap year? | ||
month_length = 29; -- if leap year then 29 days in February | month_length = 29; -- if leap year then 29 days in February | ||
end | end | ||
end | end | ||
else | else | ||
month_length=days_in_month[month]; | month_length = days_in_month[month]; | ||
end | end | ||
185行目: | 248行目: | ||
return true; | return true; | ||
end | end | ||
--[[--------------------------< I S _ V A L I D _ M O N T H _ R A N G E _ S T Y L E >-------------------------- | --[[--------------------------< I S _ V A L I D _ M O N T H _ R A N G E _ S T Y L E >-------------------------- | ||
200行目: | 264行目: | ||
if len1 == len2 then | if len1 == len2 then | ||
return true; -- both months are short form so return true | return true; -- both months are short form so return true | ||
elseif 'May' == month1 or 'May'== month2 then | elseif 'May' == month1 or 'May'== month2 then -- ToDo: I18N | ||
return true; -- both months are long form so return true | return true; -- both months are long form so return true | ||
elseif 3 == len1 or 3 == len2 then | elseif 3 == len1 or 3 == len2 then | ||
216行目: | 280行目: | ||
Month pairs are expected to be left to right, earliest to latest in time. | Month pairs are expected to be left to right, earliest to latest in time. | ||
All season ranges are accepted as valid because there are publishers out there who have published a Summer–Spring YYYY issue | All season ranges are accepted as valid because there are publishers out there who have published a Summer–Spring YYYY issue, hence treat as ok | ||
]] | ]] | ||
local function is_valid_month_season_range(range_start, range_end) | local function is_valid_month_season_range(range_start, range_end, param) | ||
local range_start_number = get_month_number (range_start); | local range_start_number = get_month_number (range_start); | ||
local range_end_number; | local range_end_number; | ||
if 0 == range_start_number then -- is this a month range? | if 0 == range_start_number then -- is this a month range? | ||
range_start_number = get_season_number (range_start); | range_start_number = get_season_number (range_start, param); -- not a month; is it a season? get start season number | ||
range_end_number = get_season_number (range_end); | range_end_number = get_season_number (range_end, param); -- get end season number | ||
if (0 ~= range_start_number) and (0 ~= range_end_number) then | if (0 ~= range_start_number) and (0 ~= range_end_number) and (range_start_number ~= range_end_number) then | ||
return true; -- any season pairing is accepted | return true; -- any season pairing is accepted except when both are the same | ||
end | end | ||
return false; -- range_start and/or range_end is not a season | return false; -- range_start and/or range_end is not a season | ||
267行目: | 331行目: | ||
the output table receives: | the output table receives: | ||
rftdate: an | rftdate: an ISO 8601 formatted date | ||
rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and | rftchron: a free-form version of the date, usually without year which is in rftdate (season ranges and proper-name dates) | ||
rftssn: one of four season keywords: winter, spring, summer, fall (lowercase) | rftssn: one of four season keywords: winter, spring, summer, fall (lowercase) | ||
rftquarter: one of four values: 1, 2, 3, 4 | |||
]] | ]] | ||
281行目: | 346行目: | ||
local day = tonumber (input.day); | local day = tonumber (input.day); | ||
if (0 ~= day) and -- day must have a value for this to be a whole date | if (0 ~= day) and -- day must have a value for this to be a whole date | ||
(((1582 == year) and (10 <= month) and (12 >= month)) or -- any whole 1582 date from 1 | (((1582 == year) and (10 <= month) and (12 >= month)) or -- any whole 1582 date from 1 October to 31 December or | ||
((1926 == year) and (1 == month) and (1 == input.day)) or -- 1 January 1926 or | ((1926 == year) and (1 == month) and (1 == input.day)) or -- 1 January 1926 or | ||
((1582 < year) and (1925 >= year))) then -- any date 1 January 1583 – 31 December 1925 | ((1582 < year) and (1925 >= year))) then -- any date 1 January 1583 – 31 December 1925 | ||
tCOinS_date.inter_cal_cat = true; -- set category flag true | tCOinS_date.inter_cal_cat = true; -- set category flag true | ||
end | end | ||
-- end temporary Julian / | -- end temporary Julian / Gregorian calendar uncertainty detection | ||
if 1582 > tonumber(input.year) or 20 < tonumber(input.month) then -- Julian calendar or season so &rft.date gets year only | if 1582 > tonumber(input.year) or 20 < tonumber(input.month) then -- Julian calendar or season so &rft.date gets year only | ||
293行目: | 358行目: | ||
date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range | date = string.format ('%.4d/%.4d', tonumber(input.year), tonumber(input.year2)) -- assemble the date range | ||
end | end | ||
if 20 < tonumber(input.month) then -- if season or | if 20 < tonumber(input.month) then -- if season or proper-name date | ||
local season = {[24]='winter', [21]='spring', [22]='summer', [23]='fall', [99]='Christmas'}; -- seasons lowercase, no autumn; proper names use title case | local season = {[24] = 'winter', [21] = 'spring', [22] = 'summer', [23] = 'fall', [33] = '1', [34] = '2', [35] = '3', [36] = '4', [98] = 'Easter', [99] = 'Christmas'}; -- seasons lowercase, no autumn; proper-names use title case | ||
if 0 == input.month2 then -- single season date | if 0 == input.month2 then -- single season date | ||
if | if 40 < tonumber(input.month) then | ||
tCOinS_date.rftchron = season[input.month]; -- proper name dates | tCOinS_date.rftchron = season[input.month]; -- proper-name dates | ||
elseif 30 < tonumber(input.month) then | |||
tCOinS_date.rftquarter = season[input.month]; -- quarters | |||
else | else | ||
tCOinS_date.rftssn = season[input.month]; -- seasons | tCOinS_date.rftssn = season[input.month]; -- seasons | ||
338行目: | 405行目: | ||
return; | return; | ||
end | end | ||
--[[--------------------------< P A T T E R N S >-------------------------------------------------------------- | |||
this is the list of patterns for date formats that this module recognizes. Approximately the first half of these | |||
patterns represent formats that might be reformatted into another format. Those that might be reformatted have | |||
'indicator' letters that identify the content of the matching capture: 'd' (day), 'm' (month), 'a' (anchor year), | |||
'y' (year); second day, month, year have a '2' suffix. | |||
These patterns are used for both date validation and for reformatting. This table should not be moved to ~/Configuration | |||
because changes to this table require changes to check_date() and to reformatter() and reformat_date() | |||
]] | |||
local patterns = { | |||
-- year-initial numerical year-month-day | |||
['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, | |||
-- month-initial: month day, year | |||
['Mdy'] = {'^(%D-) +([1-9]%d?), +((%d%d%d%d?)%a?)$', 'm', 'd', 'a', 'y'}, | |||
-- month-initial day range: month day–day, year; days are separated by endash | |||
['Md-dy'] = {'^(%D-) +([1-9]%d?)[%-–]([1-9]%d?), +((%d%d%d%d)%a?)$', 'm', 'd', 'd2', 'a', 'y'}, | |||
-- day-initial: day month year | |||
['dMy'] = {'^([1-9]%d?) +(%D-) +((%d%d%d%d?)%a?)$', 'd', 'm', 'a', 'y'}, | |||
-- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed; not supported at en.wiki | |||
-- ['yMd'] = {'^((%d%d%d%d?)%a?) +(%D-) +(%d%d?)$', 'a', 'y', 'm', 'd'}, | |||
-- day-range-initial: day–day month year; days are separated by endash | |||
['d-dMy'] = {'^([1-9]%d?)[%-–]([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'd2', 'm', 'a', 'y'}, | |||
-- day initial month-day-range: day month - day month year; uses spaced endash | |||
['dM-dMy'] = {'^([1-9]%d?) +(%D-) +[%-–] +([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'm', 'd2', 'm2', 'a', 'y'}, | |||
-- month initial month-day-range: month day – month day, year; uses spaced endash | |||
['Md-Mdy'] = {'^(%D-) +([1-9]%d?) +[%-–] +(%D-) +([1-9]%d?), +((%d%d%d%d)%a?)$','m', 'd', 'm2', 'd2', 'a', 'y'}, | |||
-- day initial month-day-year-range: day month year - day month year; uses spaced endash | |||
['dMy-dMy'] = {'^([1-9]%d?) +(%D-) +(%d%d%d%d) +[%-–] +([1-9]%d?) +(%D-) +((%d%d%d%d)%a?)$', 'd', 'm', 'y', 'd2', 'm2', 'a', 'y2'}, | |||
-- month initial month-day-year-range: month day, year – month day, year; uses spaced endash | |||
['Mdy-Mdy'] = {'^(%D-) +([1-9]%d?), +(%d%d%d%d) +[%-–] +(%D-) +([1-9]%d?), +((%d%d%d%d)%a?)$', 'm', 'd', 'y', 'm2', 'd2', 'a', 'y2'}, | |||
-- these date formats cannot be converted, per se, but month name can be rendered short or long | |||
-- month/season year - month/season year; separated by spaced endash | |||
['My-My'] = {'^(%D-) +(%d%d%d%d) +[%-–] +(%D-) +((%d%d%d%d)%a?)$', 'm', 'y', 'm2', 'a', 'y2'}, | |||
-- month/season range year; months separated by endash | |||
['M-My'] = {'^(%D-)[%-–](%D-) +((%d%d%d%d)%a?)$', 'm', 'm2', 'a', 'y'}, | |||
-- month/season year or proper-name year; quarter year when First Quarter YYYY etc. | |||
['My'] = {'^([^%d–]-) +((%d%d%d%d)%a?)$', 'm', 'a', 'y'}, -- this way because endash is a member of %D; %D- will match January–March 2019 when it shouldn't | |||
-- these date formats cannot be converted | |||
['Sy4-y2'] = {'^(%D-) +((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash | |||
['Sy-y'] = {'^(%D-) +(%d%d%d%d)[%-–]((%d%d%d%d)%a?)$'}, -- special case Winter/Summer year-year; year separated with unspaced endash | |||
['y-y'] = {'^(%d%d%d%d?)[%-–]((%d%d%d%d?)%a?)$'}, -- year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | |||
['y4-y2'] = {'^((%d%d)%d%d)[%-–]((%d%d)%a?)$'}, -- year range: YYYY–YY; separated by unspaced endash | |||
['y'] = {'^((%d%d%d%d?)%a?)$'}, -- year; here accept either YYY or YYYY | |||
} | |||
348行目: | 466行目: | ||
If the date fails the format tests, this function returns false and does not return values for anchor_year and | If the date fails the format tests, this function returns false and does not return values for anchor_year and | ||
COinS_date. When this happens, the date parameter is used in the COinS metadata and the CITEREF identifier gets | COinS_date. When this happens, the date parameter is (DEBUG: not?) used in the COinS metadata and the CITEREF identifier gets | ||
its year from the year parameter if present otherwise CITEREF does not get a date value. | its year from the year parameter if present otherwise CITEREF does not get a date value. | ||
Inputs: | Inputs: | ||
date_string - date string from date-holding parameters (date, year, | date_string - date string from date-holding parameters (date, year, publication-date, access-date, pmc-embargo-date, archive-date, lay-date) | ||
Returns: | Returns: | ||
362行目: | 480行目: | ||
]] | ]] | ||
local function check_date (date_string, tCOinS_date | local function check_date (date_string, param, tCOinS_date) | ||
local year; | local year; -- assume that year2, months, and days are not used; | ||
local year2=0; | local year2 = 0; -- second year in a year range | ||
local month=0; | local month = 0; | ||
local month2=0; | local month2 = 0; -- second month in a month range | ||
local day=0; | local day = 0; | ||
local day2=0; | local day2 = 0; -- second day in a day range | ||
local anchor_year; | local anchor_year; | ||
local coins_date; | local coins_date; | ||
if date_string:match( | if date_string:match (patterns['ymd'][1]) then -- year-initial numerical year month day format | ||
year, month, day= | year, month, day = date_string:match (patterns['ymd'][1]); | ||
if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or 0 == tonumber(day) then return false; end -- month or day number not valid or not Gregorian calendar | if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or 0 == tonumber(day) then return false; end -- month or day number not valid or not Gregorian calendar | ||
anchor_year = year; | anchor_year = year; | ||
elseif mw.ustring.match(date_string, | |||
month, day, anchor_year, year=mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Mdy'][1]) then -- month-initial: month day, year | ||
month, day, anchor_year, year = mw.ustring.match(date_string, patterns['Mdy'][1]); | |||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Md-dy'][1]) then -- month-initial day range: month day–day, year; days are separated by endash | ||
month, day, day2, anchor_year, year=mw.ustring.match(date_string, | month, day, day2, anchor_year, year = mw.ustring.match(date_string, patterns['Md-dy'][1]); | ||
if tonumber(day) >= tonumber(day2) then return false; end | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | ||
month = get_month_number (month); | |||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months | |||
month2=month; -- for metadata | |||
year2 = year; | |||
elseif mw.ustring.match(date_string, patterns['dMy'][1]) then -- day-initial: day month year | |||
day, month, anchor_year, year = mw.ustring.match(date_string, patterns['dMy'][1]); | |||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
elseif mw.ustring.match(date_string, | --[[ NOT supported at en.wiki | ||
elseif mw.ustring.match(date_string, patterns['yMd'][1]) then -- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed | |||
anchor_year, year, month, day = mw.ustring.match(date_string, patterns['yMd'][1]); | |||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
-- end NOT supported at en.wiki ]] | |||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['d-dMy'][1]) then -- day-range-initial: day–day month year; days are separated by endash | ||
day, day2, month, anchor_year, year=mw.ustring.match(date_string, | day, day2, month, anchor_year, year = mw.ustring.match(date_string, patterns['d-dMy'][1]); | ||
if tonumber(day) >= tonumber(day2) then return false; end | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; | ||
month = get_month_number (month); | month = get_month_number (month); | ||
if 0 == month then return false; end | if 0 == month then return false; end -- return false if month text isn't one of the twelve months | ||
month2=month; | month2 = month; -- for metadata | ||
year2=year; | year2 = year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['dM-dMy'][1]) then -- day initial month-day-range: day month - day month year; uses spaced endash | ||
day, month, day2, month2, anchor_year, year=mw.ustring.match(date_string, | day, month, day2, month2, anchor_year, year = mw.ustring.match(date_string, patterns['dM-dMy'][1]); | ||
if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; | if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; | ||
month = get_month_number (month); | month = get_month_number (month); -- for metadata | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
year2=year; | year2 = year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Md-Mdy'][1]) then -- month initial month-day-range: month day – month day, year; uses spaced endash | ||
month, day, month2, day2, anchor_year, year=mw.ustring.match(date_string, | month, day, month2, day2, anchor_year, year = mw.ustring.match(date_string, patterns['Md-Mdy'][1]); | ||
if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end | if (not is_valid_month_season_range(month, month2, param)) or not is_valid_year(year) then return false; end | ||
month = get_month_number (month); | month = get_month_number (month); -- for metadata | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
year2=year; | year2 = year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['dMy-dMy'][1]) then -- day initial month-day-year-range: day month year - day month year; uses spaced endash | ||
day, month, year, day2, month2, anchor_year, year2=mw.ustring.match(date_string, | day, month, year, day2, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['dMy-dMy'][1]); | ||
if tonumber(year2) <= tonumber(year) then return false; end | if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | ||
month = get_month_number (month); | month = get_month_number (month); -- for metadata | ||
month2 = get_month_number (month2); | month2 = get_month_number (month2); | ||
if 0 == month or 0 == month2 then return false; end -- both must be valid | |||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Mdy-Mdy'][1]) then -- month initial month-day-year-range: month day, year – month day, year; uses spaced endash | ||
month, day, year, month2, day2, anchor_year, year2=mw.ustring.match(date_string, | month, day, year, month2, day2, anchor_year, year2 = mw.ustring.match(date_string, patterns['Mdy-Mdy'][1]); | ||
if tonumber(year2) <= tonumber(year) then return false; end | if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style | ||
month = get_month_number (month); | month = get_month_number (month); -- for metadata | ||
month2 = get_month_number (month2); | month2 = get_month_number(month2); | ||
if 0 == month or 0 == month2 then return false; end -- both must be valid | |||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Sy4-y2'][1]) then -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash | ||
local century; | local century; | ||
month, year, century, anchor_year, year2=mw.ustring.match(date_string, | month, year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy4-y2'][1]); | ||
if 'Winter' ~= month and 'Summer' ~= month then return false end; | if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer | ||
anchor_year=year..'–'..anchor_year; | anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | ||
year2 = century..year2; | year2 = century..year2; -- add the century to year2 for comparisons | ||
if 1 ~= tonumber(year2) - tonumber(year) then return false; end | if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) then return false; end | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
month = get_season_number (month); | month = get_season_number(month, param); | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash | ||
month, year, anchor_year, year2=mw.ustring.match(date_string, | month, year, anchor_year, year2 = mw.ustring.match(date_string, patterns['Sy-y'][1]); | ||
if 'Winter' ~= month and 'Summer' ~= month then return false end; | if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer | ||
anchor_year=year..'–'..anchor_year; | anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | ||
if 1 ~= tonumber(year2) - tonumber(year) then return false; end | if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later | ||
if not is_valid_year(year2) then return false; end | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
month = get_season_number (month); | month = get_season_number (month, param); -- for metadata | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash | ||
month, year, month2, anchor_year, year2=mw.ustring.match(date_string, | month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, patterns['My-My'][1]); | ||
anchor_year=year..'–'..anchor_year; | anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years | ||
if tonumber(year) >= tonumber(year2) then return false; end | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
if 0 ~= get_month_number(month) and 0 ~= get_month_number(month2) and is_valid_month_range_style(month, month2) then -- both must be month year, same month style | if 0 ~= get_month_number(month) and 0 ~= get_month_number(month2) and is_valid_month_range_style(month, month2) then -- both must be month year, same month style | ||
month = get_month_number(month); | month = get_month_number(month); | ||
month2 = get_month_number(month2); | month2 = get_month_number(month2); | ||
elseif 0 ~= get_season_number(month) and 0 ~= get_season_number(month2) then | elseif 0 ~= get_season_number(month, param) and 0 ~= get_season_number(month2, param) then -- both must be season year, not mixed | ||
month = get_season_number(month); | month = get_season_number(month, param); | ||
month2 = get_season_number(month2); | month2 = get_season_number(month2, param); | ||
else | else | ||
return false; | return false; | ||
end | end | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['M-My'][1]) then -- month/season range year; months separated by endash | ||
month, month2, anchor_year, year=mw.ustring.match(date_string, | month, month2, anchor_year, year = mw.ustring.match(date_string, patterns['M-My'][1]); | ||
if (not is_valid_month_season_range(month, month2)) or (not is_valid_year(year)) then return false; end | if (not is_valid_month_season_range(month, month2, param)) or (not is_valid_year(year)) then return false; end | ||
if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season | if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season | ||
month = get_month_number(month); | month = get_month_number(month); | ||
month2 = get_month_number(month2); | month2 = get_month_number(month2); | ||
if 0 == month or 0 == month2 then return false; end | |||
else | else | ||
month = get_season_number(month); | month = get_season_number(month, param); | ||
month2 = get_season_number(month2); | month2 = get_season_number(month2, param); | ||
end | end | ||
year2=year; | year2 = year; | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['My'][1]) then -- month/season/quarter/proper-name year | ||
month, anchor_year, year=mw.ustring.match(date_string, | month, anchor_year, year = mw.ustring.match(date_string, patterns['My'][1]); | ||
if not is_valid_year(year) then return false; end | if not is_valid_year(year) then return false; end | ||
month = get_element_number(month, param); -- get month season quarter proper-name number or nil | |||
if not month then return false; end -- not valid whatever it is | |||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 | ||
year, anchor_year, year2=mw.ustring.match(date_string, | year, anchor_year, year2 = mw.ustring.match(date_string, patterns['y-y'][1]); | ||
anchor_year=year..'–'..anchor_year; -- assemble anchor year from both years | anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years | ||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif mw.ustring.match(date_string, | elseif mw.ustring.match(date_string, patterns['y4-y2'][1]) then -- Year range: YYYY–YY; separated by unspaced endash | ||
local century; | local century; | ||
year, century, anchor_year, year2=mw.ustring.match(date_string, | year, century, anchor_year, year2 = mw.ustring.match(date_string, patterns['y4-y2'][1]); | ||
anchor_year=year..'–'..anchor_year; | anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years | ||
if in_array (param, {'date', 'publication-date', 'year'}) then | |||
add_prop_cat ('year_range_abbreviated'); | |||
end | |||
if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003 | if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003 | ||
year2 = century..year2; -- add the century to year2 for comparisons | year2 = century .. year2; -- add the century to year2 for comparisons | ||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year | ||
elseif | elseif mw.ustring.match(date_string, patterns['y'][1]) then -- year; here accept either YYY or YYYY | ||
anchor_year, year= | anchor_year, year = mw.ustring.match(date_string, patterns['y'][1]); | ||
if false == is_valid_year(year) then | if false == is_valid_year(year) then | ||
return false; | return false; | ||
512行目: | 640行目: | ||
end | end | ||
if | if 'access-date' == param then -- test accessdate here because we have numerical date parts | ||
if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required | if 0 ~= year and 0 ~= month and 0 ~= day and -- all parts of a single date required | ||
0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range | 0 == year2 and 0 == month2 and 0 == day2 then -- none of these; accessdate must not be a range | ||
if not is_valid_accessdate (year..'-'..month..'-'..day) then | if not is_valid_accessdate(year .. '-' .. month .. '-' .. day) then | ||
return false; -- return false when accessdate out of bounds | return false; -- return false when accessdate out of bounds | ||
end | end | ||
525行目: | 653行目: | ||
local result=true; -- check whole dates for validity; assume true because not all dates will go through this test | local result=true; -- check whole dates for validity; assume true because not all dates will go through this test | ||
if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then -- YMD (simple whole date) | if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then -- YMD (simple whole date) | ||
result=is_valid_date(year,month,day); | result = is_valid_date(year, month, day); | ||
elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 ~= day2 then -- YMD-d (day range) | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 ~= day2 then -- YMD-d (day range) | ||
result=is_valid_date(year,month,day); | result = is_valid_date(year, month, day); | ||
result=result and is_valid_date(year,month,day2); | result = result and is_valid_date(year, month, day2); | ||
elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-md (day month range) | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-md (day month range) | ||
result=is_valid_date(year,month,day); | result = is_valid_date(year, month, day); | ||
result=result and is_valid_date(year,month2,day2); | result = result and is_valid_date(year, month2, day2); | ||
elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 ~= year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-ymd (day month year range) | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 ~= year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-ymd (day month year range) | ||
result=is_valid_date(year,month,day); | result = is_valid_date(year, month, day); | ||
result=result and is_valid_date(year2,month2,day2); | result = result and is_valid_date(year2, month2, day2); | ||
end | end | ||
543行目: | 671行目: | ||
if nil ~= tCOinS_date then -- this table only passed into this function when testing |date= parameter values | if nil ~= tCOinS_date then -- this table only passed into this function when testing |date= parameter values | ||
make_COinS_date ({year=year, month=month, day=day, year2=year2, month2=month2, day2=day2}, tCOinS_date); -- make an ISO 8601 date string for COinS | make_COinS_date ({year = year, month = month, day = day, year2 = year2, month2 = month2, day2 = day2}, tCOinS_date); -- make an ISO 8601 date string for COinS | ||
end | end | ||
557行目: | 685行目: | ||
Unlike most error messages created in this module, only one error message is created by this function. Because all of the date holding parameters are processed serially, | Unlike most error messages created in this module, only one error message is created by this function. Because all of the date holding parameters are processed serially, | ||
parameters with errors are added to the <error_list> sequence table as the dates are tested. | |||
]] | ]] | ||
local function dates(date_parameters_list, tCOinS_date) | local function dates(date_parameters_list, tCOinS_date, error_list) | ||
local anchor_year; | local anchor_year; -- will return as nil if the date being tested is not |date= | ||
local COinS_date; | local COinS_date; -- will return as nil if the date being tested is not |date= | ||
local embargo_date; -- if embargo date is a good dmy, mdy, ymd date then holds original value else reset to 9999 | local embargo_date; -- if embargo date is a good dmy, mdy, ymd date then holds original value else reset to 9999 | ||
local good_date = false; | local good_date = false; | ||
for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set(v) then | if is_set(v.val) then -- if the parameter has a value | ||
if v:match("^c%. [1-9]%d%d%d?%a?$") then | v.val = mw.ustring.gsub(v.val, '%d', cfg.date_names.local_digits); -- translate 'local' digits to Western 0-9 | ||
if v.val:match("^c%. [1-9]%d%d%d?%a?$") then -- special case for c. year or with or without CITEREF disambiguator - only |date= and |year= | |||
local year = v:match("c%. ([1-9]%d%d%d?)%a?"); | local year = v.val:match("c%. ([1-9]%d%d%d?)%a?"); -- get the year portion so it can be tested | ||
if 'date'==k then | if 'date' == k then | ||
anchor_year, COinS_date = v:match("((c%. [1-9]%d%d%d?)%a?)"); -- anchor year and COinS_date only from |date= parameter | anchor_year, COinS_date = v.val:match("((c%. [1-9]%d%d%d?)%a?)"); -- anchor year and COinS_date only from |date= parameter | ||
good_date = is_valid_year(year); | good_date = is_valid_year(year); | ||
elseif 'year'==k then | elseif 'year' == k then | ||
good_date = is_valid_year(year); | good_date = is_valid_year(year); | ||
end | end | ||
elseif 'date'==k then -- if the parameter is |date= | elseif 'date' == k then -- if the parameter is |date= | ||
if v:match("^n%.d%.%a?$") then | if v.val:match("^n%.d%.%a?$") then -- ToDo: I18N -- if |date=n.d. with or without a CITEREF disambiguator | ||
good_date, anchor_year, COinS_date = true, v:match("((n%.d%.)%a?)"); --"n.d."; no error when date parameter is set to no date | good_date, anchor_year, COinS_date = true, v.val:match("((n%.d%.)%a?)"); -- ToDo: I18N -- "n.d."; no error when date parameter is set to no date | ||
elseif v:match("^nd%a?$") then | elseif v.val:match("^nd%a?$") then -- ToDo: I18N -- if |date=nd with or without a CITEREF disambiguator | ||
good_date, anchor_year, COinS_date = true, v:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date | good_date, anchor_year, COinS_date = true, v.val:match("((nd)%a?)"); -- ToDo: I18N -- "nd"; no error when date parameter is set to no date | ||
else | else | ||
good_date, anchor_year, COinS_date = check_date (v, tCOinS_date); -- go test the date | good_date, anchor_year, COinS_date = check_date (v.val, k, tCOinS_date); -- go test the date | ||
end | end | ||
elseif 'year'==k then -- if the parameter is |year= it should hold only a year value | elseif 'year' == k then -- if the parameter is |year= it should hold only a year value | ||
if v:match("^[1-9]%d%d%d?%a?$") then | if v.val:match("^[1-9]%d%d%d?%a?$") then -- if |year = 3 or 4 digits only with or without a CITEREF disambiguator | ||
good_date, anchor_year, COinS_date = true, v:match("((%d+)%a?)"); | good_date, anchor_year, COinS_date = true, v.val:match("((%d+)%a?)"); | ||
end | end | ||
elseif ' | elseif 'pmc-embargo-date' == k then -- if the parameter is |pmc-embargo-date= | ||
good_date = check_date (v, | good_date = check_date (v.val, k); -- go test the date | ||
if true == good_date then -- if the date is a valid date | if true == good_date then -- if the date is a valid date | ||
good_date, embargo_date = is_valid_embargo_date (v); | good_date, embargo_date = is_valid_embargo_date (v.val); -- is |pmc-embargo-date= date a single dmy, mdy, or ymd formatted date? yes: returns embargo; no: returns 9999 | ||
end | end | ||
else -- any other date-holding parameter | else -- any other date-holding parameter | ||
good_date = check_date (v); | good_date = check_date (v.val, k); -- go test the date | ||
end | end | ||
if false==good_date then -- assemble one error message so we don't add the tracking category multiple times | if false == good_date then -- assemble one error message so we don't add the tracking category multiple times | ||
table.insert (error_list, wrap_style ('parameter', v.name)); -- make parameter name suitable for error message list | |||
end | end | ||
end | end | ||
end | end | ||
return anchor_year, embargo_date | return anchor_year, embargo_date; -- and done | ||
end | end | ||
645行目: | 767行目: | ||
end | end | ||
elseif mw.ustring.match(date_string, "%d%d%d%d[%-–]%d%d") then | elseif mw.ustring.match(date_string, "%d%d%d%d[%-–]%d%d") then -- YYYY-YY date ranges | ||
local century; | local century; | ||
date1, century, date2 = mw.ustring.match(date_string, "((%d%d)%d%d)[%-–]+(%d%d)"); | date1, century, date2 = mw.ustring.match(date_string, "((%d%d)%d%d)[%-–]+(%d%d)"); | ||
665行目: | 787行目: | ||
--[[-------------------------< R E F O R M A T | --[[--------------------------< R E F O R M A T T E R >-------------------------------------------------------- | ||
reformat 'date' into new format specified by format_param if pattern_idx (the current format of 'date') can be | |||
reformatted. Does the grunt work for reformat_dates(). | |||
]] | The table re_formats maps pattern_idx (current format) and format_param (desired format) to a table that holds: | ||
format string used by string.format() | |||
identifier letters ('d', 'm', 'y', 'd2', 'm2', 'y2') that serve as indexes into a table t{} that holds captures | |||
from mw.ustring.match() for the various date parts specified by patterns[pattern_idx][1] | |||
Items in patterns{} have the general form: | |||
[' | ['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, where: | ||
['ymd'] is pattern_idx | |||
patterns['ymd'][1] is the match pattern with captures for mw.ustring.match() | |||
patterns['ymd'][2] is an indicator letter identifying the content of the first capture | |||
patterns['ymd'][3] ... the second capture etc. | |||
when a pattern matches a date, the captures are loaded into table t{} in capture order using the idemtifier | |||
characters as indexes into t{} For the above, a ymd date is in t{} as: | |||
t.y = first capture (year), t.m = second capture (month), t.d = third capture (day) | |||
To reformat, this function is called with the pattern_idx that matches the current format of the date and with | |||
format_param set to the desired format. This function loads table t{} as described and then calls string.format() | |||
with the format string specified by re_format[pattern_idx][format_param][1] using values taken from t{} according | |||
to the capture identifier letters specified by patterns[pattern_idx][format_param][n] where n is 2.. | |||
]] | ]] | ||
local | local re_formats = { | ||
['ymd'] = { -- date format is ymd; reformat to: | |||
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | |||
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | |||
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki | |||
}, | |||
['Mdy'] = { -- date format is Mdy; reformat to: | |||
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- for long/short reformatting | |||
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | |||
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | |||
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki | |||
}, | |||
['dMy'] = { -- date format is dMy; reformat to: | |||
['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- for long/short reformatting | |||
['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | |||
['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | |||
-- ['yMd'] = {'%s %s %s', 'y', 'm', 'd'}, -- |df=yMd; not supported at en.wiki | |||
}, | |||
['Md-dy'] = { -- date format is Md-dy; reformat to: | |||
['mdy'] = {'%s %s–%s, %s', 'm', 'd', 'd2', 'y'}, -- for long/short reformatting | |||
['dmy'] = {'%s–%s %s %s', 'd', 'd2', 'm', 'y'}, -- |df=dmy -> d-dMy | |||
}, | |||
['d-dMy'] = { -- date format is d-d>y; reformat to: | |||
['dmy'] = {'%s–%s %s %s', 'd', 'd2', 'm', 'y'}, -- for long/short reformatting | |||
['mdy'] = {'%s %s–%s, %s', 'm', 'd', 'd2', 'y'}, -- |df=mdy -> Md-dy | |||
}, | |||
['dM-dMy'] = { -- date format is dM-dMy; reformat to: | |||
['dmy'] = {'%s %s – %s %s %s', 'd', 'm', 'd2', 'm2', 'y'}, -- for long/short reformatting | |||
['mdy'] = {'%s %s – %s %s, %s', 'm', 'd', 'm2', 'd2', 'y'}, -- |df=mdy -> Md-Mdy | |||
}, | |||
['Md-Mdy'] = { -- date format is Md-Mdy; reformat to: | |||
['mdy'] = {'%s %s – %s %s, %s', 'm', 'd', 'm2', 'd2', 'y'}, -- for long/short reformatting | |||
['dmy'] = {'%s %s – %s %s %s', 'd', 'm', 'd2', 'm2', 'y'}, -- |df=dmy -> dM-dMy | |||
}, | |||
['dMy-dMy'] = { -- date format is dMy-dMy; reformat to: | |||
['dmy'] = {'%s %s %s – %s %s %s', 'd', 'm', 'y', 'd2', 'm2', 'y2'}, -- for long/short reformatting | |||
['mdy'] = {'%s %s, %s – %s %s, %s', 'm', 'd', 'y', 'm2', 'd2', 'y2'}, -- |df=mdy -> Mdy-Mdy | |||
}, | |||
['Mdy-Mdy'] = { -- date format is Mdy-Mdy; reformat to: | |||
['mdy'] = {'%s %s, %s – %s %s, %s', 'm', 'd', 'y', 'm2', 'd2', 'y2'}, -- for long/short reformatting | |||
['dmy'] = {'%s %s %s – %s %s %s', 'd', 'm', 'y', 'd2', 'm2', 'y2'}, -- |df=dmy -> dMy-dMy | |||
}, | |||
['My-My'] = { -- these for long/short reformatting | |||
['any'] = {'%s %s – %s %s', 'm', 'y', 'm2', 'y2'}, -- dmy/mdy agnostic | |||
}, | |||
['M-My'] = { -- these for long/short reformatting | |||
['any'] = {'%s–%s %s', 'm', 'm2', 'y'}, -- dmy/mdy agnostic | |||
}, | |||
['My'] = { -- these for long/short reformatting | |||
['any'] = {'%s %s', 'm', 'y'}, -- dmy/mdy agnostic | |||
}, | |||
-- ['yMd'] = { -- not supported at en.wiki | |||
-- ['mdy'] = {'%s %s, %s', 'm', 'd', 'y'}, -- |df=mdy | |||
-- ['dmy'] = {'%s %s %s', 'd', 'm', 'y'}, -- |df=dmy | |||
-- ['ymd'] = {'%s-%s-%s', 'y', 'm', 'd'}, -- |df=ymd | |||
-- }, | |||
} | |||
-- | local function reformatter (date, pattern_idx, format_param, mon_len) | ||
if not in_array (pattern_idx, {'ymd', 'Mdy', 'Md-dy', 'dMy', 'yMd', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then | |||
return; -- not in this set of date format patterns then not a reformattable date | |||
end | |||
if 'ymd' == format_param and in_array (pattern_idx, {'ymd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then | |||
return; -- ymd date ranges not supported at en.wiki; no point in reformatting ymd to ymd | |||
end | |||
if in_array (pattern_idx, {'My', 'M-My', 'My-My'}) then -- these are not dmy/mdy so can't be 'reformatted' into either | |||
format_param = 'any'; -- so format-agnostic | |||
end | |||
]] | -- yMd is not supported at en.wiki | ||
-- if yMd is supported at your wiki, uncomment the next line | |||
-- if 'yMd' == format_param and in_array (pattern_idx, {'yMd', 'Md-dy', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy'}) then -- these formats not convertable; yMd not supported at en.wiki | |||
-- if yMd is supported at your wiki, remove or comment-out the next line | |||
if 'yMd' == format_param then -- yMd not supported at en.wiki | |||
return; -- not a reformattable date | |||
end | |||
local c1, c2, c3, c4, c5, c6, c7; -- these hold the captures specified in patterns[pattern_idx][1] | |||
c1, c2, c3, c4, c5, c6, c7 = mw.ustring.match (date, patterns[pattern_idx][1]); -- get the captures | |||
local | local t = { -- table that holds k/v pairs of date parts from the captures and patterns[pattern_idx][2..] | ||
[patterns[pattern_idx][2]] = c1; -- at minimum there is always one capture with a matching indicator letter | |||
[patterns[pattern_idx][3] or 'x'] = c2; -- patterns can have a variable number of captures; each capture requires an indicator letter; | |||
[patterns[pattern_idx][4] or 'x'] = c3; -- where there is no capture, there is no indicator letter so n in patterns[pattern_idx][n] will be nil; | |||
[patterns[pattern_idx][5] or 'x'] = c4; -- the 'x' here spoofs an indicator letter to prevent 'table index is nil' error | |||
[patterns[pattern_idx][6] or 'x'] = c5; | |||
[patterns[pattern_idx][7] or 'x'] = c6; | |||
[patterns[pattern_idx][8] or 'x'] = c7; | |||
}; | |||
if t.a then -- if this date has an anchor year capture | |||
t.y = t.a; -- use the anchor year capture when reassembling the date | |||
end | |||
--[[-------------- | if tonumber(t.m) then -- if raw month is a number (converting from ymd) | ||
if 's' == mon_len then -- if we are to use abbreviated month names | |||
t.m = cfg.date_names['inv_local_s'][tonumber(t.m)]; -- convert it to a month name | |||
else | |||
t.m = cfg.date_names['inv_local_l'][tonumber(t.m)]; -- convert it to a month name | |||
end | |||
t.d = t.d:gsub ('0(%d)', '%1'); -- strip leading '0' from day if present | |||
elseif 'ymd' == format_param then -- when converting to ymd | |||
if 1582 > tonumber(t.y) then -- ymd format dates not allowed before 1582 | |||
return; | |||
end | |||
t.m = string.format ('%02d', get_month_number (t.m)); -- make sure that month and day are two digits | |||
t.d = string.format ('%02d', t.d); | |||
elseif mon_len then -- if mon_len is set to either 'short' or 'long' | |||
for _, mon in ipairs ({'m', 'm2'}) do -- because there can be two month names, check both | |||
if t[mon] then | |||
t[mon] = get_month_number (t[mon]); -- get the month number for this month (is length agnostic) | |||
if 0 == t[mon] then return; end -- seasons and named dates can't be converted | |||
t[mon] = (('s' == mon_len) and cfg.date_names['inv_local_s'][t[mon]]) or cfg.date_names['inv_local_l'][t[mon]]; -- fetch month name according to length | |||
end | |||
end | |||
end | |||
]] | local new_date = string.format (re_formats[pattern_idx][format_param][1], -- format string | ||
t[re_formats[pattern_idx][format_param][2]], -- named captures from t{} | |||
t[re_formats[pattern_idx][format_param][3]], | |||
t[re_formats[pattern_idx][format_param][4]], | |||
t[re_formats[pattern_idx][format_param][5]], | |||
t[re_formats[pattern_idx][format_param][6]], | |||
t[re_formats[pattern_idx][format_param][7]], | |||
t[re_formats[pattern_idx][format_param][8]] | |||
); | |||
return new_date; | |||
end | end | ||
727行目: | 951行目: | ||
--[[-------------------------< R E F O R M A T _ D A T E S >-------------------------------------------------- | --[[-------------------------< R E F O R M A T _ D A T E S >-------------------------------------------------- | ||
Reformats existing dates into the format specified by format | Reformats existing dates into the format specified by format. | ||
format is one of several keywords: dmy, dmy-all, mdy, mdy-all, ymd, ymd-all. The all version includes access- and | format is one of several manual keywords: dmy, dmy-all, mdy, mdy-all, ymd, ymd-all. The -all version includes | ||
archive-dates; otherwise these dates are not reformatted | access- and archive-dates; otherwise these dates are not reformatted. | ||
This function allows automatic date formatting. In ~/Configuration, the article source is searched for one of | |||
the {{use xxx dates}} templates. If found, xxx becomes the global date format as xxx-all. If |cs1-dates= in | |||
{{use xxx dates}} has legitimate value then that value determines how cs1|2 dates will be rendered. Legitimate | |||
values for |cs1-dates= are: | |||
l - all dates are rendered with long month names | |||
ls - publication dates use long month names; access-/archive-dates use abbreviated month names | |||
ly - publication dates use long month names; access-/archive-dates rendered in ymd format | |||
s - all dates are rendered with abbreviated (short) month names | |||
sy - publication dates use abbreviated month names; access-/archive-dates rendered in ymd format | |||
y - all dates are rendered in ymd format | |||
the format argument for automatic date formatting will be the format specified by {{use xxx dates}} with the | |||
value supplied by |cs1-dates so one of: xxx-l, xxx-ls, xxx-ly, xxx-s, xxx-sy, xxx-y, or simply xxx (|cs1-dates= | |||
empty, omitted, or invalid) where xxx shall be either of dmy or mdy. | |||
dates are extracted from date_parameters_list, reformatted (if appropriate), and then written back into the | |||
list in the new format. Dates in date_parameters_list are presumed here to be valid (no errors). This function | |||
returns true when a date has been reformatted, false else. Actual reformatting is done by reformatter(). | |||
]] | ]] | ||
local function reformat_dates (date_parameters_list, format | local function reformat_dates (date_parameters_list, format) | ||
local all = false; -- set to false to skip access- and archive-dates | local all = false; -- set to false to skip access- and archive-dates | ||
local | local len_p = 'l'; -- default publication date length shall be long | ||
local | local len_a = 'l'; -- default access-/archive-date length shall be long | ||
local result = false; | |||
local new_date; | |||
if format:match('%a+%-all') then | if format:match('%a+%-all') then -- manual df keyword; auto df keyword when length not specified in {{use xxx dates}}; | ||
format = format:match('(%a+)%-all'); -- extract the format | format = format:match('(%a+)%-all'); -- extract the format | ||
all = true; -- | all = true; -- all dates are long format dates because this keyword doesn't specify length | ||
end | elseif format:match('%a+%-[lsy][sy]?') then -- auto df keywords; internal only | ||
all = true; -- auto df applies to all dates; use length specified by capture len_p for all dates | |||
format, len_p, len_a = format:match('(%a+)%-([lsy])([sy]?)'); -- extract the format and length keywords | |||
if 'y' == len_p then -- because allowed by MOS:DATEUNIFY (sort of) range dates and My dates not reformatted | |||
format = 'ymd'; -- override {{use xxx dates}} | |||
elseif (not is_set(len_a)) or (len_p == len_a) then -- no access-/archive-date length specified or same length as publication dates then | |||
len_a = len_p; -- in case len_a not set | |||
end | |||
end -- else only publication dates and they are long | |||
for param_name, param_val in pairs (date_parameters_list) do -- for each date-holding parameter in the list | for param_name, param_val in pairs (date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set (param_val) then | if is_set (param_val.val) then -- if the parameter has a value | ||
if not all and in_array (param_name, {'access-date', 'archive-date'}) then -- | if not (not all and in_array (param_name, {'access-date', 'archive-date'})) then -- skip access- or archive-date unless format is xxx-all; yeah, ugly; TODO: find a better way | ||
for pattern_idx, pattern in pairs (patterns) do | |||
if mw.ustring.match (param_val.val, pattern[1]) then | |||
if all and in_array (param_name, {'access-date', 'archive-date'}) then -- if this date is an access- or archive-date | |||
new_date = reformatter (param_val.val, pattern_idx, (('y' == len_a) and 'ymd') or format, len_a); -- choose ymd or dmy/mdy according to len_a setting | |||
else -- all other dates | |||
new_date = reformatter (param_val.val, pattern_idx, format, len_p); | |||
end | |||
if new_date then -- set when date was reformatted | |||
date_parameters_list[param_name].val = new_date; -- update date in date list | |||
result = true; -- and announce that changes have been made | |||
end | |||
end -- if | |||
end -- for | |||
end -- if | |||
end -- if | |||
end -- for | |||
return result; -- declare boolean result and done | |||
end | |||
end | |||
end | |||
end | |||
end | end | ||
799行目: | 1,034行目: | ||
local n; | local n; | ||
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if not | if is_set (param_val.val) then | ||
if not mw.ustring.match (param_val.val, '%d%d%d%d%-%d%d%-%d%d') then -- for those that are not ymd dates (ustring because here digits may not be Western) | |||
param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash | |||
if 0 ~= n then | |||
date_parameters_list[param_name].val = param_val.val; -- update the list | |||
result = true; | |||
end | |||
end | end | ||
end | end | ||
815行目: | 1,052行目: | ||
Attempts to translate English month names to local-language month names using names supplied by MediaWiki's | Attempts to translate English month names to local-language month names using names supplied by MediaWiki's | ||
date parser function. This is simple name-for-name replacement and may not work for all languages. | date parser function. This is simple name-for-name replacement and may not work for all languages. | ||
if xlat_dig is true, this function will also translate Western (English) digits to the local language's digits. | |||
This will also translate ymd dates. | |||
]] | ]] | ||
local function date_name_xlate (date_parameters_list) | local function date_name_xlate (date_parameters_list, xlt_dig) | ||
local xlate; | local xlate; | ||
local mode; -- long or short month names | local mode; -- long or short month names | ||
825行目: | 1,065行目: | ||
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list | ||
if is_set(param_val) then | if is_set(param_val.val) then -- if the parameter has a value | ||
date = param_val; | date = param_val.val; | ||
for month in mw.ustring.gmatch (date, '%a+') do -- iterate through all dates in the date (single date or date range) | for month in mw.ustring.gmatch (date, '%a+') do -- iterate through all dates in the date (single date or date range) | ||
if cfg.date_names.en.long[month] then | if cfg.date_names.en.long[month] then | ||
836行目: | 1,076行目: | ||
end | end | ||
if mode then | if mode then -- might be a season | ||
xlate = | xlate = lang_object:formatDate(mode, '1' .. month); -- translate the month name to this local language | ||
date = mw.ustring.gsub (date, month, xlate); | date = mw.ustring.gsub (date, month, xlate); -- replace the English with the translation | ||
date_parameters_list[param_name] = date; | date_parameters_list[param_name].val = date; -- save the translated date | ||
modified = true; | modified = true; | ||
end | end | ||
end | end | ||
if xlt_dig then -- shall we also translate digits? | |||
date = date:gsub ('%d', cfg.date_names.xlate_digits); -- translate digits from Western to 'local digits' | |||
date_parameters_list[param_name].val = date; -- save the translated date | |||
modified = true; | |||
end | |||
end | end | ||
end | end | ||
858行目: | 1,103行目: | ||
local function set_selected_modules (cfg_table_ptr, utilities_page_ptr) | local function set_selected_modules (cfg_table_ptr, utilities_page_ptr) | ||
add_prop_cat = utilities_page_ptr.add_prop_cat ; -- import functions from selected Module:Citation/CS1/Utilities module | |||
in_array = utilities_page_ptr.in_array; | is_set = utilities_page_ptr.is_set; | ||
in_array = utilities_page_ptr.in_array; | |||
-- set_message = utilities_page_ptr.set_message; | |||
wrap_style = utilities_page_ptr.wrap_style; | |||
cfg = cfg_table_ptr; -- import tables from selected Module:Citation/CS1/Configuration | cfg = cfg_table_ptr; -- import tables from selected Module:Citation/CS1/Configuration | ||
end | end | ||
--[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------ | |||
]] | |||
return { -- return exported functions | return { -- return exported functions |