Home | History | Annotate | Line # | Download | only in sets
fmt-list revision 1.6
      1  1.1  rillig #! /usr/bin/lua
      2  1.6  rillig -- $NetBSD: fmt-list,v 1.6 2022/09/08 05:05:08 rillig Exp $
      3  1.1  rillig 
      4  1.1  rillig --[[
      5  1.1  rillig 
      6  1.1  rillig Align the lines of a file list so that all lines from the same directory
      7  1.1  rillig have the other fields at the same indentation.
      8  1.1  rillig 
      9  1.1  rillig Sort the lines and remove duplicate lines.
     10  1.1  rillig 
     11  1.2  rillig usage: ./fmt-list [-n] */*/{mi,ad.*,md.*}
     12  1.1  rillig 
     13  1.1  rillig ]]
     14  1.1  rillig 
     15  1.1  rillig local function test(func)
     16  1.1  rillig   func()
     17  1.1  rillig end
     18  1.1  rillig 
     19  1.1  rillig local function assert_equals(got, expected)
     20  1.1  rillig   if got ~= expected then
     21  1.6  rillig     assert(false, ("got %q, expected %q"):format(got, expected))
     22  1.1  rillig   end
     23  1.1  rillig end
     24  1.1  rillig 
     25  1.1  rillig 
     26  1.1  rillig -- Calculate the width of the given string on the screen, assuming that
     27  1.1  rillig -- the tab width is 8 and that the string starts at a tabstop.
     28  1.1  rillig local function tabwidth(str)
     29  1.1  rillig   local width = 0
     30  1.1  rillig   for i = 1, #str do
     31  1.6  rillig     if str:sub(i, i) == "\t" then
     32  1.1  rillig       width = width // 8 * 8 + 8
     33  1.1  rillig     else
     34  1.1  rillig       width = width + 1
     35  1.1  rillig     end
     36  1.1  rillig   end
     37  1.1  rillig   return width
     38  1.1  rillig end
     39  1.1  rillig 
     40  1.1  rillig test(function()
     41  1.1  rillig   assert_equals(tabwidth(""), 0)
     42  1.1  rillig   assert_equals(tabwidth("1234"), 4)
     43  1.1  rillig   assert_equals(tabwidth("\t"), 8)
     44  1.1  rillig   assert_equals(tabwidth("1234567\t"), 8)
     45  1.1  rillig   assert_equals(tabwidth("\t1234\t"), 16)
     46  1.1  rillig   assert_equals(tabwidth("\t1234\t1"), 17)
     47  1.1  rillig end)
     48  1.1  rillig 
     49  1.1  rillig 
     50  1.1  rillig -- Calculate the tab characters that are necessary to set the width
     51  1.1  rillig -- of the string to the desired width.
     52  1.1  rillig local function tabs(str, width)
     53  1.1  rillig   local strwidth = tabwidth(str)
     54  1.6  rillig   local tabs = ("\t"):rep((width - strwidth + 7) // 8)
     55  1.1  rillig   if tabs == "" then
     56  1.6  rillig     error(("%q\t%d\t%d"):format(str, strwidth, width))
     57  1.1  rillig   end
     58  1.1  rillig   assert(tabs ~= "")
     59  1.1  rillig   return tabs
     60  1.1  rillig end
     61  1.1  rillig 
     62  1.1  rillig test(function()
     63  1.1  rillig   assert_equals(tabs("", 8), "\t")
     64  1.1  rillig   assert_equals(tabs("1234567", 8), "\t")
     65  1.1  rillig   assert_equals(tabs("", 64), "\t\t\t\t\t\t\t\t")
     66  1.1  rillig end)
     67  1.1  rillig 
     68  1.1  rillig 
     69  1.1  rillig -- Group the items by a key and then execute the action on each of the
     70  1.1  rillig -- groups.
     71  1.1  rillig local function foreach_group(items, get_key, action)
     72  1.1  rillig   local key
     73  1.1  rillig   local group = {}
     74  1.1  rillig   for _, item in ipairs(items) do
     75  1.1  rillig     local item_key = assert(get_key(item))
     76  1.1  rillig     if item_key ~= key then
     77  1.1  rillig       if #group > 0 then action(group, key) end
     78  1.1  rillig       key = item_key
     79  1.1  rillig       group = {}
     80  1.1  rillig     end
     81  1.1  rillig     table.insert(group, item)
     82  1.1  rillig   end
     83  1.1  rillig   if #group > 0 then action(group, key) end
     84  1.1  rillig end
     85  1.1  rillig 
     86  1.1  rillig test(function()
     87  1.1  rillig   local items = {
     88  1.1  rillig     {"prime", 2},
     89  1.1  rillig     {"prime", 3},
     90  1.1  rillig     {"not prime", 4},
     91  1.1  rillig     {"prime", 5},
     92  1.1  rillig     {"prime", 7}
     93  1.1  rillig   }
     94  1.1  rillig   local result = ""
     95  1.1  rillig   foreach_group(
     96  1.1  rillig     items,
     97  1.1  rillig     function(item) return item[1] end,
     98  1.1  rillig     function(group, key)
     99  1.6  rillig       result = result .. ("%d %s\n"):format(#group, key)
    100  1.1  rillig     end)
    101  1.1  rillig   assert_equals(result, "2 prime\n1 not prime\n2 prime\n")
    102  1.1  rillig end)
    103  1.1  rillig 
    104  1.1  rillig 
    105  1.1  rillig -- Parse a line from a file list and split it into its meaningful parts.
    106  1.1  rillig local function parse_entry(line)
    107  1.1  rillig 
    108  1.1  rillig   local category_align, prefix, fullname, flags_align, category, flags =
    109  1.4  rillig     line:match("^(([#%-]?)(%.%S*)%s+)((%S+)%s+)(%S+)$")
    110  1.1  rillig   if fullname == nil then
    111  1.1  rillig     category_align, prefix, fullname, category =
    112  1.4  rillig       line:match("^(([#%-]?)(%.%S*)%s+)(%S+)$")
    113  1.1  rillig   end
    114  1.1  rillig   if fullname == nil then
    115  1.1  rillig     prefix, fullname = line:match("^(%-)(%.%S*)$")
    116  1.1  rillig   end
    117  1.1  rillig   if fullname == nil then
    118  1.1  rillig     return
    119  1.1  rillig   end
    120  1.1  rillig 
    121  1.1  rillig   local dirname, basename = fullname:match("^(.+)/([^/]+)$")
    122  1.1  rillig   if dirname == nil then
    123  1.1  rillig     dirname, basename = "", fullname
    124  1.1  rillig   end
    125  1.1  rillig 
    126  1.1  rillig   local category_col, flags_col
    127  1.1  rillig   if category_align ~= nil then
    128  1.1  rillig     category_col = tabwidth(category_align)
    129  1.1  rillig   end
    130  1.1  rillig   if flags_align ~= nil then
    131  1.1  rillig     flags_col = tabwidth(flags_align)
    132  1.1  rillig   end
    133  1.1  rillig 
    134  1.1  rillig   return {
    135  1.1  rillig     prefix = prefix,
    136  1.1  rillig     fullname = fullname,
    137  1.1  rillig     dirname = dirname,
    138  1.1  rillig     basename = basename,
    139  1.1  rillig     category_col = category_col,
    140  1.1  rillig     category = category,
    141  1.1  rillig     flags_col = flags_col,
    142  1.1  rillig     flags = flags
    143  1.1  rillig   }
    144  1.1  rillig end
    145  1.1  rillig 
    146  1.1  rillig test(function()
    147  1.1  rillig   local entry = parse_entry("./dirname/filename\t\t\tcategory\tflags")
    148  1.1  rillig   assert_equals(entry.prefix, "")
    149  1.1  rillig   assert_equals(entry.fullname, "./dirname/filename")
    150  1.1  rillig   assert_equals(entry.dirname, "./dirname")
    151  1.1  rillig   assert_equals(entry.basename, "filename")
    152  1.1  rillig   assert_equals(entry.category_col, 40)
    153  1.1  rillig   assert_equals(entry.category, "category")
    154  1.1  rillig   assert_equals(entry.flags_col, 16)
    155  1.1  rillig   assert_equals(entry.flags, "flags")
    156  1.4  rillig 
    157  1.4  rillig   entry = parse_entry("#./dirname/filename\tcat\tflags")
    158  1.4  rillig   assert_equals(entry.prefix, "#")
    159  1.4  rillig   assert_equals(entry.fullname, "./dirname/filename")
    160  1.4  rillig   assert_equals(entry.dirname, "./dirname")
    161  1.4  rillig   assert_equals(entry.basename, "filename")
    162  1.4  rillig   assert_equals(entry.category_col, 24)
    163  1.4  rillig   assert_equals(entry.category, "cat")
    164  1.4  rillig   assert_equals(entry.flags_col, 8)
    165  1.4  rillig   assert_equals(entry.flags, "flags")
    166  1.1  rillig end)
    167  1.1  rillig 
    168  1.1  rillig 
    169  1.1  rillig -- Return the smaller of the given values, ignoring nil.
    170  1.1  rillig local function min(curr, value)
    171  1.1  rillig   if curr == nil or (value ~= nil and value < curr) then
    172  1.1  rillig     return value
    173  1.1  rillig   end
    174  1.1  rillig   return curr
    175  1.1  rillig end
    176  1.1  rillig 
    177  1.1  rillig test(function()
    178  1.1  rillig   assert_equals(min(nil, nil), nil)
    179  1.1  rillig   assert_equals(min(0, nil), 0)
    180  1.1  rillig   assert_equals(min(nil, 0), 0)
    181  1.1  rillig   assert_equals(min(0, 0), 0)
    182  1.1  rillig   assert_equals(min(1, -1), -1)
    183  1.1  rillig   assert_equals(min(-1, 1), -1)
    184  1.1  rillig end)
    185  1.1  rillig 
    186  1.1  rillig 
    187  1.1  rillig -- Return the larger of the given values, ignoring nil.
    188  1.1  rillig local function max(curr, value)
    189  1.1  rillig   if curr == nil or (value ~= nil and value > curr) then
    190  1.1  rillig     return value
    191  1.1  rillig   end
    192  1.1  rillig   return curr
    193  1.1  rillig end
    194  1.1  rillig 
    195  1.1  rillig test(function()
    196  1.1  rillig   assert_equals(max(nil, nil), nil)
    197  1.1  rillig   assert_equals(max(0, nil), 0)
    198  1.1  rillig   assert_equals(max(nil, 0), 0)
    199  1.1  rillig   assert_equals(max(0, 0), 0)
    200  1.1  rillig   assert_equals(max(1, -1), 1)
    201  1.1  rillig   assert_equals(max(-1, 1), 1)
    202  1.1  rillig end)
    203  1.1  rillig 
    204  1.1  rillig 
    205  1.1  rillig -- Calculate the column on which the field should be aligned.
    206  1.1  rillig local function column(entries, get_width_before, colname)
    207  1.1  rillig 
    208  1.1  rillig   local function nexttab(col)
    209  1.1  rillig     return col // 8 * 8 + 8
    210  1.1  rillig   end
    211  1.1  rillig 
    212  1.1  rillig   local currmin, currmax, required
    213  1.1  rillig 
    214  1.1  rillig   for _, entry in ipairs(entries) do
    215  1.1  rillig     local width = get_width_before(entry)
    216  1.1  rillig     if width ~= nil then
    217  1.1  rillig       required = max(required, width)
    218  1.1  rillig 
    219  1.1  rillig       local col = entry[colname]
    220  1.1  rillig       currmin = min(currmin, col)
    221  1.1  rillig       currmax = max(currmax, col)
    222  1.1  rillig     end
    223  1.1  rillig   end
    224  1.1  rillig 
    225  1.1  rillig   if currmin == currmax then
    226  1.1  rillig     return currmin, "aligned"
    227  1.1  rillig   end
    228  1.1  rillig   return nexttab(required), "unaligned"
    229  1.1  rillig end
    230  1.1  rillig 
    231  1.1  rillig test(function()
    232  1.1  rillig 
    233  1.1  rillig   local function width_before_category(entry)
    234  1.1  rillig     return tabwidth(entry.prefix .. entry.fullname)
    235  1.1  rillig   end
    236  1.1  rillig 
    237  1.1  rillig   local function width_before_flags(entry)
    238  1.1  rillig     return tabwidth(entry.category)
    239  1.1  rillig   end
    240  1.1  rillig 
    241  1.1  rillig   -- The entries are nicely aligned, therefore there is no need to change
    242  1.1  rillig   -- anything.
    243  1.1  rillig   local entries = {
    244  1.1  rillig     parse_entry("./file1\tcategory"),
    245  1.1  rillig     parse_entry("./file2\tcategory")
    246  1.1  rillig   }
    247  1.1  rillig   assert_equals(entries[2].category_col, 8)
    248  1.1  rillig   assert_equals(width_before_category(entries[2]), 7)
    249  1.1  rillig   assert_equals(column(entries, width_before_category, "category_col"), 8)
    250  1.1  rillig 
    251  1.1  rillig   -- The entries are currently not aligned, therefore they are aligned
    252  1.1  rillig   -- to the minimum required column.
    253  1.1  rillig   entries = {
    254  1.1  rillig     parse_entry("./file1\tcategory"),
    255  1.1  rillig     parse_entry("./directory/file2\tcategory"),
    256  1.1  rillig   }
    257  1.1  rillig   assert_equals(entries[2].category_col, 24)
    258  1.1  rillig   assert_equals(column(entries, width_before_category, "category_col"), 24)
    259  1.1  rillig 
    260  1.1  rillig   -- The entries are already aligned, therefore the current alignment is
    261  1.1  rillig   -- preserved, even though it is more than the minimum required alignment
    262  1.1  rillig   -- of 8.  There are probably reasons for the large indentation.
    263  1.1  rillig   entries = {
    264  1.1  rillig     parse_entry("./file1\t\t\tcategory"),
    265  1.1  rillig     parse_entry("./file2\t\t\tcategory")
    266  1.1  rillig   }
    267  1.1  rillig   assert_equals(column(entries, width_before_category, "category_col"), 24)
    268  1.1  rillig 
    269  1.1  rillig   -- The flags are already aligned, 4 tabs to the right of the category.
    270  1.1  rillig   -- There is no reason to change anything here.
    271  1.1  rillig   entries = {
    272  1.1  rillig     parse_entry("./file1\tcategory\t\t\tflags"),
    273  1.1  rillig     parse_entry("./file2\tcategory"),
    274  1.1  rillig     parse_entry("./file3\tcat\t\t\t\tflags")
    275  1.1  rillig   }
    276  1.1  rillig   assert_equals(column(entries, width_before_flags, "flags_col"), 32)
    277  1.1  rillig 
    278  1.1  rillig end)
    279  1.1  rillig 
    280  1.1  rillig 
    281  1.1  rillig -- Amend the entries by the tabs used for alignment.
    282  1.1  rillig local function add_tabs(entries)
    283  1.1  rillig 
    284  1.1  rillig   local function width_before_category(entry)
    285  1.1  rillig     return tabwidth(entry.prefix .. entry.fullname)
    286  1.1  rillig   end
    287  1.1  rillig   local function width_before_flags(entry)
    288  1.1  rillig     if entry.flags ~= nil then
    289  1.1  rillig       return tabwidth(entry.category)
    290  1.1  rillig     end
    291  1.1  rillig   end
    292  1.1  rillig 
    293  1.1  rillig   local category_col, category_aligned =
    294  1.1  rillig     column(entries, width_before_category, "category_col")
    295  1.1  rillig   local flags_col = column(entries, width_before_flags, "flags_col")
    296  1.1  rillig 
    297  1.6  rillig   -- To avoid horizontal jumps for the category column, the minimum column is
    298  1.6  rillig   -- set to 56.  This way, the third column is usually set to 72, which is
    299  1.1  rillig   -- still visible on an 80-column screen.
    300  1.1  rillig   if category_aligned == "unaligned" then
    301  1.1  rillig     category_col = max(category_col, 56)
    302  1.1  rillig   end
    303  1.1  rillig 
    304  1.1  rillig   for _, entry in ipairs(entries) do
    305  1.1  rillig     local prefix = entry.prefix
    306  1.1  rillig     local fullname = entry.fullname
    307  1.1  rillig     local category = entry.category
    308  1.1  rillig     local flags = entry.flags
    309  1.1  rillig 
    310  1.1  rillig     if category ~= nil then
    311  1.1  rillig       entry.category_tabs = tabs(prefix .. fullname, category_col)
    312  1.1  rillig       if flags ~= nil then
    313  1.1  rillig         entry.flags_tabs = tabs(category, flags_col)
    314  1.1  rillig       end
    315  1.1  rillig     end
    316  1.1  rillig   end
    317  1.1  rillig end
    318  1.1  rillig 
    319  1.1  rillig test(function()
    320  1.1  rillig   local entries = {
    321  1.1  rillig     parse_entry("./file1\t\t\t\tcategory\t\tflags"),
    322  1.1  rillig     parse_entry("./file2\t\t\t\tcategory\t\tflags"),
    323  1.1  rillig     parse_entry("./file3\t\t\tcategory\t\tflags")
    324  1.1  rillig   }
    325  1.1  rillig   add_tabs(entries)
    326  1.1  rillig   assert_equals(entries[1].category_tabs, "\t\t\t\t\t\t\t")
    327  1.1  rillig   assert_equals(entries[2].category_tabs, "\t\t\t\t\t\t\t")
    328  1.1  rillig   assert_equals(entries[3].category_tabs, "\t\t\t\t\t\t\t")
    329  1.1  rillig   assert_equals(entries[1].flags_tabs, "\t\t")
    330  1.1  rillig   assert_equals(entries[2].flags_tabs, "\t\t")
    331  1.1  rillig   assert_equals(entries[3].flags_tabs, "\t\t")
    332  1.1  rillig end)
    333  1.1  rillig 
    334  1.1  rillig 
    335  1.1  rillig -- Normalize the alignment of the fields of the entries.
    336  1.1  rillig local function normalize(entries)
    337  1.1  rillig 
    338  1.1  rillig   local function less(a, b)
    339  1.1  rillig     if a.fullname ~= b.fullname then
    340  1.5  rillig       -- To sort by directory first, comment out the following line.
    341  1.1  rillig       return a.fullname < b.fullname
    342  1.1  rillig     end
    343  1.5  rillig     if a.dirname ~= b.dirname then
    344  1.5  rillig       return a.dirname < b.dirname
    345  1.5  rillig     end
    346  1.5  rillig     if a.basename ~= b.basename then
    347  1.5  rillig       return a.basename < b.basename
    348  1.5  rillig     end
    349  1.1  rillig     if a.category ~= nil and b.category ~= nil and a.category ~= b.category then
    350  1.1  rillig       return a.category < b.category
    351  1.1  rillig     end
    352  1.1  rillig     return a.flags ~= nil and b.flags ~= nil and a.flags < b.flags
    353  1.1  rillig   end
    354  1.1  rillig   table.sort(entries, less)
    355  1.1  rillig 
    356  1.1  rillig   local function by_dirname(entry)
    357  1.1  rillig     return entry.dirname
    358  1.1  rillig   end
    359  1.1  rillig   foreach_group(entries, by_dirname, add_tabs)
    360  1.1  rillig 
    361  1.1  rillig end
    362  1.1  rillig 
    363  1.1  rillig 
    364  1.1  rillig -- Read a file list completely into memory.
    365  1.1  rillig local function read_list(fname)
    366  1.1  rillig   local head = {}
    367  1.1  rillig   local entries = {}
    368  1.1  rillig   local errors = {}
    369  1.1  rillig 
    370  1.1  rillig   local f = assert(io.open(fname, "r"))
    371  1.1  rillig   local lineno = 0
    372  1.1  rillig   for line in f:lines() do
    373  1.1  rillig     lineno = lineno + 1
    374  1.1  rillig 
    375  1.1  rillig     local entry = parse_entry(line)
    376  1.1  rillig     if entry ~= nil then
    377  1.1  rillig       table.insert(entries, entry)
    378  1.1  rillig     elseif line:match("^#") then
    379  1.1  rillig       table.insert(head, line)
    380  1.1  rillig     else
    381  1.6  rillig       local msg = ("%s:%d: unknown line format %q"):format(fname, lineno, line)
    382  1.1  rillig       table.insert(errors, msg)
    383  1.1  rillig     end
    384  1.1  rillig   end
    385  1.1  rillig 
    386  1.1  rillig   f:close()
    387  1.1  rillig 
    388  1.1  rillig   return head, entries, errors
    389  1.1  rillig end
    390  1.1  rillig 
    391  1.1  rillig 
    392  1.1  rillig -- Write the normalized list file back to disk.
    393  1.1  rillig --
    394  1.1  rillig -- Duplicate lines are skipped.  This allows to append arbitrary lines to
    395  1.1  rillig -- the end of the file and have them cleaned up automatically.
    396  1.1  rillig local function write_list(fname, head, entries)
    397  1.1  rillig   local f = assert(io.open(fname, "w"))
    398  1.1  rillig 
    399  1.1  rillig   for _, line in ipairs(head) do
    400  1.1  rillig     f:write(line, "\n")
    401  1.1  rillig   end
    402  1.1  rillig 
    403  1.1  rillig   local prev_line = ""
    404  1.1  rillig   for _, entry in ipairs(entries) do
    405  1.1  rillig     local line = entry.prefix .. entry.fullname
    406  1.1  rillig     if entry.category ~= nil then
    407  1.1  rillig       line = line .. entry.category_tabs .. entry.category
    408  1.1  rillig     end
    409  1.1  rillig     if entry.flags ~= nil then
    410  1.1  rillig       line = line .. entry.flags_tabs .. entry.flags
    411  1.1  rillig     end
    412  1.1  rillig 
    413  1.1  rillig     if line ~= prev_line then
    414  1.1  rillig       prev_line = line
    415  1.1  rillig       f:write(line, "\n")
    416  1.4  rillig     else
    417  1.6  rillig       --print(("%s: duplicate entry: %s"):format(fname, line))
    418  1.1  rillig     end
    419  1.1  rillig   end
    420  1.1  rillig 
    421  1.1  rillig   f:close()
    422  1.1  rillig end
    423  1.1  rillig 
    424  1.1  rillig 
    425  1.1  rillig -- Load a file list, normalize it and write it back to disk.
    426  1.2  rillig local function format_list(fname, write_back)
    427  1.1  rillig   local head, entries, errors = read_list(fname)
    428  1.1  rillig   if #errors > 0 then
    429  1.1  rillig     for _, err in ipairs(errors) do
    430  1.1  rillig       print(err)
    431  1.1  rillig     end
    432  1.3  rillig     return false
    433  1.1  rillig   end
    434  1.1  rillig 
    435  1.1  rillig   normalize(entries)
    436  1.2  rillig 
    437  1.2  rillig   if write_back then
    438  1.2  rillig     write_list(fname, head, entries)
    439  1.2  rillig   end
    440  1.3  rillig   return true
    441  1.1  rillig end
    442  1.1  rillig 
    443  1.1  rillig 
    444  1.2  rillig local function main(arg)
    445  1.3  rillig   local seen_error = false
    446  1.2  rillig   local write_back = true
    447  1.2  rillig   for _, fname in ipairs(arg) do
    448  1.2  rillig     if fname == "-n" then
    449  1.2  rillig       write_back = false
    450  1.2  rillig     else
    451  1.3  rillig       if not format_list(fname, write_back) then
    452  1.3  rillig         seen_error = true
    453  1.3  rillig       end
    454  1.2  rillig     end
    455  1.2  rillig   end
    456  1.3  rillig   return not seen_error
    457  1.1  rillig end
    458  1.2  rillig 
    459  1.3  rillig os.exit(main(arg))
    460