Module:Process ask query results 1
Documentation for this module may be created at Module:Process ask query results 1/doc
local p = {}
function p.processData(frame)
local pages_only_arg = frame.args["pages_only"]
local full_query_arg = frame.args["full_query"]
--
local page_names_array = mw.text.split(pages_only_arg, "#")
local my_log = {}
for i = 1, #page_names_array do
page_names_array[i] = mw.text.trim(page_names_array[i])
end
local concatenated_page_names_string = "<ul><li>Elements of page_names_array:</li>"
for i, v in ipairs(page_names_array) do
concatenated_page_names_string = concatenated_page_names_string .. "<li>" .. i .. ": " .. v .. "</li>"
end
concatenated_page_names_string = concatenated_page_names_string .. "</ul>"
-- find the position of each page name in the full query response:
local page_names_and_positions = {}
-- build a table of the form {index: {page name, start pos, end pos}, ...} where "start pos" and "end pos" are the positions in the
-- in the full query response string of the starting and ending characters of the page name
for i, page_name in ipairs(page_names_array) do
ch1, ch2 = string.find(full_query_arg, page_name)
if ch1 ~= nil then
table.insert(page_names_and_positions, {string.sub(full_query_arg, ch1, ch2), ch1, ch2})
else
-- in the unlikely event that the page name isn't found, a message could be reported to the user on the page
local info = debug.getinfo(1)
local currentLine = info.currentline
table.insert(my_log, "Line " .. currentLine .. ": Property name \"" .. page_name .. "\" was not found.")
end
end
-- Sort so that elements of this table, representing page names in the response string, are in the order that they occur in the string, left to right.
-- They are sorted by the starting position of each page name
-- (Lua only sorts tables that have an integer index at the first level)
table.sort(page_names_and_positions, function(a, b) return a[2] < b[2] end)
-- To find the substring of full_query_arg associated with each page name, we need to group the end position of each page name with beginning position
-- of the following page name, so that we can extract the substring following each page name. In the subtable for each page name, we are going
-- to add the starting position of the following page name.
--
-- The substring following the last page name in the string will be bounded by the end of the string. So we need to know where that is.
endchar = #full_query_arg
-- We are going to use a "for" statement to loop through the first through the penultimate page names. So we need to know how many there are.
count = 0
for i in ipairs(page_names_and_positions) do
count = count + 1
end
-- for the first through the penultimate elements in the page_names_and_positions table we build the corresponding element in the new table:
for i = 1, count - 1 do
-- we should check that the table elements are not nil before attempting the assignment
if page_names_and_positions[i][1] == nil then
-- report
elseif page_names_and_positions[i][3] == nil then
-- report
elseif page_names_and_positions[i + 1][2] == nil then
-- report
else
page_names_and_positions[i][4] = page_names_and_positions[i + 1][2] - 1
end
end
-- the last element is built by a slightly different formula because its end position is the end of the string
if page_names_and_positions[count][1] == nil then
-- report
elseif page_names_and_positions[count][3] == nil then
-- report
else
page_names_and_positions[count][4] = endchar
end
-- check to see that we have partioned the entire string
total_characters = 0
for i, v in ipairs(page_names_and_positions) do
total_characters = total_characters + v[4] - v[2] + 1
end
if total_characters ~= #full_query_arg then
local info = debug.getinfo(1) -- 1 indicates the currently executing function
local currentLine = info.currentline
table.insert(my_log, "Line " .. currentLine .. ": Some part of the query response string was not associated with a page name when the string was partitioned.")
else
--
end
-- we use the positions in the page_names_and_positions table to extract the substring that contains the property list, and
-- we build a new table to group each page name with its property list substring
page_names_and_property_lists = {}
for i, subtable in ipairs(page_names_and_positions) do
page_names_and_property_lists[i] = {subtable[1], string.sub(full_query_arg, subtable[3] + 1, subtable[4] - 1)}
end
-- Each list has a " (" in front of it, and a "), " and at the end. These are removed.
for i, subtable in ipairs(page_names_and_property_lists) do
subtable[2] = string.gsub(subtable[2], "^%s*(.-)%s*$", "%1")
if string.sub(subtable[2], 1, 1) == "(" then
subtable[2] = string.sub(subtable[2], 2)
end
if string.sub(subtable[2], -1, -1) == "," then
subtable[2] = string.sub(subtable[2], 1, -2)
end
if string.sub(subtable[2], -1, -1) == ")" then
subtable[2] = string.sub(subtable[2], 1, -2)
end
end
local concatenated_string_3 = "<ul><li>Elements of page_names_and_property_lists after removing parentheses</li>"
for k, v in pairs(page_names_and_property_lists) do
concatenated_string_3 = concatenated_string_3 .. "<li>" .. k .. ": {" .. v[1] .. ", " .. v[2] .. "}" .. "</li>"
end
concatenated_string_3 = concatenated_string_3 .. "</ul>"
property_names_and_positions = {}
property_names_array = {"Newspaper Item Number", "Newspaper Title", "Newspaper Page", "Newspaper Column", "Publication Date", "Newspaper Full Text", "Newspaper Excerpt", "Newspaper Summary", "Newspaper Online Source", "Mentions"}
ch1, ch2 = 0, 0 -- reinitializing
for i, property_name in ipairs(property_names_array) do
ch1, ch2 = string.find(page_names_and_property_lists[1][2], property_name)
if ch1 ~= nil then
table.insert(property_names_and_positions, {property_name, ch1, ch2})
end
end
concatenated_string_4 = "<ul><li>" .. "For " .. page_names_and_property_lists[1][1] .. ": " .. "</li>"
for i, v in ipairs(property_names_and_positions) do
concatenated_string_4 = concatenated_string_4 .. "<li>" .. i .. ": " .. "{" .. v[1] .. ", " .. v[2] .. "}" .. "</li>"
end
concatenated_string_4 = concatenated_string_4 + "</ul>"
return "<ul><li>" .. pages_only_arg .. "</ul></li>" .. concatenated_page_names_string .. "<ul><li>" .. full_query_arg .."</li></ul>" .. concatenated_string_3 .. concatenated_string_4 .. "<ul><li>" .. "Newspaper Item Number: n47" .. "</li></ul>"
end
return p