Module:Process ask query results 1: Difference between revisions

From Livermore History Collaborative
Jump to navigation Jump to search
No edit summary
No edit summary
Line 149: Line 149:
     test_string = string.sub(page_names_and_property_lists[1][2], property_names_and_positions[9][2], property_names_and_positions[9][3])
     test_string = string.sub(page_names_and_property_lists[1][2], property_names_and_positions[9][2], property_names_and_positions[9][3])
     test_string_2 = string.sub(page_names_and_property_lists[1][2], 360, 400)
     test_string_2 = string.sub(page_names_and_property_lists[1][2], 360, 400)
     test_string_3 = string.sub(page_names_and_property_lists[1][2], 401, 500)
     test_string_3 = string.sub(page_names_and_property_lists[1][2], 450, 600)





Revision as of 23:41, 8 December 2025

Documentation for this module may be created at Module:Process ask query results 1/doc

local p = {}

function p.processData(frame)
    local pages_only_arg = frame.args["pages_only"]
    local full_query_arg = frame.args["full_query"]
    local extra_arg = frame.args["extra"]
    -- 
    local page_names_array = mw.text.split(pages_only_arg, "#")

    local my_log = {}
    
    for i = 1, #page_names_array do
      page_names_array[i] = mw.text.trim(page_names_array[i])
    end

    local concatenated_page_names_string = "<ul><li>Elements of page_names_array:</li>"
    for i, v in ipairs(page_names_array) do
        concatenated_page_names_string = concatenated_page_names_string .. "<li>" .. i .. ": " .. v .. "</li>"
    end
    concatenated_page_names_string = concatenated_page_names_string .. "</ul>"


    
    -- find the position of each page name in the full query response:
    local page_names_and_positions = {}
    -- build a table of the form {index: {page name, start pos, end pos}, ...} where "start pos" and "end pos" are the positions in the 
    -- in the full query response string of the starting and ending characters of the page name
    for i, page_name in ipairs(page_names_array) do
        ch1, ch2 = string.find(full_query_arg, page_name)
        if ch1 ~= nil then
            table.insert(page_names_and_positions, {string.sub(full_query_arg, ch1, ch2), ch1, ch2})
        else
        -- in the unlikely event that the page name isn't found, a message could be reported to the user on the page
          --local info = debug.getinfo(1)
          --local currentLine = info.currentline
          --table.insert(my_log, "Line " .. currentLine .. ": Property name \"" .. page_name .. "\" was not found.")
        end
    end


    -- Sort so that elements of this table, representing page names in the response string, are in the order that they occur in the string, left to right.
    -- They are sorted by the starting position of each page name
    -- (Lua only sorts tables that have an integer index at the first level)
    table.sort(page_names_and_positions, function(a, b) return a[2] < b[2] end)
    


    -- To find the substring of full_query_arg associated with each page name, we need to group the end position of each page name with beginning position
    -- of the following page name, so that we can extract the substring following each page name. In the subtable for each page name, we are going
    -- to add the starting position of the following page name.
    -- 
    -- The substring following the last page name in the string will be bounded by the end of the string. So we need to know where that is.
    endchar = #full_query_arg
    -- We are going to use a "for" statement to loop through the first through the penultimate page names. So we need to know how many there are.
    count = 0
    for i in ipairs(page_names_and_positions) do
        count = count + 1
    end
    -- for the first through the penultimate elements in the page_names_and_positions table we build the corresponding element in the new table:
    for i = 1, count - 1 do
    -- we should check that the table elements are not nil before attempting the assignment
        if page_names_and_positions[i][1] == nil then
          -- report 
        elseif page_names_and_positions[i][3] == nil then 
          -- report
        elseif page_names_and_positions[i + 1][2] == nil then
          -- report
        else
          page_names_and_positions[i][4] =  page_names_and_positions[i + 1][2] - 1
        end
    end
    -- the last element is built by a slightly different formula because its end position is the end of the string
    if page_names_and_positions[count][1] == nil then
      -- report 
    elseif page_names_and_positions[count][3] == nil then 
      -- report
    else
      page_names_and_positions[count][4] =  endchar
    end

   
    -- check to see that we have partioned the entire string
    total_characters = 0
    for i, v in ipairs(page_names_and_positions) do
        total_characters = total_characters + v[4] - v[2] + 1
    end
    if total_characters ~= #full_query_arg then
      --local info = debug.getinfo(1) -- 1 indicates the currently executing function
      --local currentLine = info.currentline
      --table.insert(my_log, "Line " .. currentLine .. ": Some part of the query response string was not associated with a page name when the string was partitioned.")
    else
      -- 
    end


   -- we use the positions in the page_names_and_positions table to extract the substring that contains the property list, and
   -- we build a new table to group each page name with its property list substring
   page_names_and_property_lists = {}

    for i, subtable in ipairs(page_names_and_positions) do
        page_names_and_property_lists[i] = {subtable[1], string.sub(full_query_arg, subtable[3] + 1, subtable[4] - 1)}
    end

    


    -- Each list has a " (" in front of it, and a "), " and at the end. These are removed.
    for i, subtable in ipairs(page_names_and_property_lists) do
        subtable[2] = string.gsub(subtable[2], "^%s*(.-)%s*$", "%1")
        if string.sub(subtable[2], 1, 1) == "(" then
          subtable[2] = string.sub(subtable[2], 2)
        end
        if string.sub(subtable[2], -1, -1) == "," then
          subtable[2] = string.sub(subtable[2], 1, -2)
        end
        if string.sub(subtable[2], -1, -1) == ")" then
          subtable[2] = string.sub(subtable[2], 1, -2)
        end
    end

 
    local concatenated_string_3 = "<ul><li>Elements of page_names_and_property_lists after removing parentheses</li>"

    for k, v in pairs(page_names_and_property_lists) do
      concatenated_string_3 = concatenated_string_3 .. "<li>" .. k .. ": {" .. v[1] .. ", " .. v[2] .. "}" .. "</li>"
    end
    concatenated_string_3 = concatenated_string_3 .. "</ul>"
  
    property_names_and_positions = {}

    property_names_array = {"Newspaper Item Number", "Newspaper Title", "Newspaper Page", "Newspaper Column", "Publication Date", "Newspaper Full Text", "Newspaper Excerpt", "Newspaper Summary", "Newspaper Online Source", "Mentions"}

 
    ch1, ch2 = 0, 0 -- reinitializing
    for i, property_name in ipairs(property_names_array) do
        ch1, ch2 = string.find(page_names_and_property_lists[1][2], property_name)
        if ch1 ~= nil then
            table.insert(property_names_and_positions, {property_name, ch1, ch2})
        end
    end
 

    concatenated_string_4 = "<ul><li>" .. "For " .. page_names_and_property_lists[1][1] .. ": " .. "</li>"
    for i, v in ipairs(property_names_and_positions) do
        concatenated_string_4 = concatenated_string_4  .. "<li>" .. i .. ": " .. "{" .. v[1] .. ", " .. v[2] .. ", " .. v[3] .. "}" .. "</li>"
    end
    concatenated_string_4 = concatenated_string_4  .. "</ul>"

    test_string = string.sub(page_names_and_property_lists[1][2], property_names_and_positions[9][2], property_names_and_positions[9][3])
    test_string_2 = string.sub(page_names_and_property_lists[1][2], 360, 400)
    test_string_3 = string.sub(page_names_and_property_lists[1][2], 450, 600)


    return "<ul><li>" .. pages_only_arg .. "</ul></li>" .. concatenated_page_names_string  .. "<ul><li>" .. full_query_arg .."</li></ul>"   .. concatenated_string_3 ..  "<ul><li>" .. page_names_and_property_lists[1][2] .. "</li><li>" .. "Test string: " .. test_string .. "</li><li>" .. "Test string 2: " .. test_string_2 .. "</li><li>" .. "Test string 3: " .. test_string_3 .. "</li></ul>" .. concatenated_string_4 .. "<ul><li>" .. "Newspaper Item Number: n47" .. "</li><li>" .. "Extra Argument: " .. extra_arg .. "</li></ul>"

 
    
    
end

return p