Home | History | Annotate | Line # | Download | only in sets
      1 #! /usr/bin/lua
      2 -- $NetBSD: fmt-list,v 1.6 2022/09/08 05:05:08 rillig Exp $
      3 
      4 --[[
      5 
      6 Align the lines of a file list so that all lines from the same directory
      7 have the other fields at the same indentation.
      8 
      9 Sort the lines and remove duplicate lines.
     10 
     11 usage: ./fmt-list [-n] */*/{mi,ad.*,md.*}
     12 
     13 ]]
     14 
     15 local function test(func)
     16   func()
     17 end
     18 
     19 local function assert_equals(got, expected)
     20   if got ~= expected then
     21     assert(false, ("got %q, expected %q"):format(got, expected))
     22   end
     23 end
     24 
     25 
     26 -- Calculate the width of the given string on the screen, assuming that
     27 -- the tab width is 8 and that the string starts at a tabstop.
     28 local function tabwidth(str)
     29   local width = 0
     30   for i = 1, #str do
     31     if str:sub(i, i) == "\t" then
     32       width = width // 8 * 8 + 8
     33     else
     34       width = width + 1
     35     end
     36   end
     37   return width
     38 end
     39 
     40 test(function()
     41   assert_equals(tabwidth(""), 0)
     42   assert_equals(tabwidth("1234"), 4)
     43   assert_equals(tabwidth("\t"), 8)
     44   assert_equals(tabwidth("1234567\t"), 8)
     45   assert_equals(tabwidth("\t1234\t"), 16)
     46   assert_equals(tabwidth("\t1234\t1"), 17)
     47 end)
     48 
     49 
     50 -- Calculate the tab characters that are necessary to set the width
     51 -- of the string to the desired width.
     52 local function tabs(str, width)
     53   local strwidth = tabwidth(str)
     54   local tabs = ("\t"):rep((width - strwidth + 7) // 8)
     55   if tabs == "" then
     56     error(("%q\t%d\t%d"):format(str, strwidth, width))
     57   end
     58   assert(tabs ~= "")
     59   return tabs
     60 end
     61 
     62 test(function()
     63   assert_equals(tabs("", 8), "\t")
     64   assert_equals(tabs("1234567", 8), "\t")
     65   assert_equals(tabs("", 64), "\t\t\t\t\t\t\t\t")
     66 end)
     67 
     68 
     69 -- Group the items by a key and then execute the action on each of the
     70 -- groups.
     71 local function foreach_group(items, get_key, action)
     72   local key
     73   local group = {}
     74   for _, item in ipairs(items) do
     75     local item_key = assert(get_key(item))
     76     if item_key ~= key then
     77       if #group > 0 then action(group, key) end
     78       key = item_key
     79       group = {}
     80     end
     81     table.insert(group, item)
     82   end
     83   if #group > 0 then action(group, key) end
     84 end
     85 
     86 test(function()
     87   local items = {
     88     {"prime", 2},
     89     {"prime", 3},
     90     {"not prime", 4},
     91     {"prime", 5},
     92     {"prime", 7}
     93   }
     94   local result = ""
     95   foreach_group(
     96     items,
     97     function(item) return item[1] end,
     98     function(group, key)
     99       result = result .. ("%d %s\n"):format(#group, key)
    100     end)
    101   assert_equals(result, "2 prime\n1 not prime\n2 prime\n")
    102 end)
    103 
    104 
    105 -- Parse a line from a file list and split it into its meaningful parts.
    106 local function parse_entry(line)
    107 
    108   local category_align, prefix, fullname, flags_align, category, flags =
    109     line:match("^(([#%-]?)(%.%S*)%s+)((%S+)%s+)(%S+)$")
    110   if fullname == nil then
    111     category_align, prefix, fullname, category =
    112       line:match("^(([#%-]?)(%.%S*)%s+)(%S+)$")
    113   end
    114   if fullname == nil then
    115     prefix, fullname = line:match("^(%-)(%.%S*)$")
    116   end
    117   if fullname == nil then
    118     return
    119   end
    120 
    121   local dirname, basename = fullname:match("^(.+)/([^/]+)$")
    122   if dirname == nil then
    123     dirname, basename = "", fullname
    124   end
    125 
    126   local category_col, flags_col
    127   if category_align ~= nil then
    128     category_col = tabwidth(category_align)
    129   end
    130   if flags_align ~= nil then
    131     flags_col = tabwidth(flags_align)
    132   end
    133 
    134   return {
    135     prefix = prefix,
    136     fullname = fullname,
    137     dirname = dirname,
    138     basename = basename,
    139     category_col = category_col,
    140     category = category,
    141     flags_col = flags_col,
    142     flags = flags
    143   }
    144 end
    145 
    146 test(function()
    147   local entry = parse_entry("./dirname/filename\t\t\tcategory\tflags")
    148   assert_equals(entry.prefix, "")
    149   assert_equals(entry.fullname, "./dirname/filename")
    150   assert_equals(entry.dirname, "./dirname")
    151   assert_equals(entry.basename, "filename")
    152   assert_equals(entry.category_col, 40)
    153   assert_equals(entry.category, "category")
    154   assert_equals(entry.flags_col, 16)
    155   assert_equals(entry.flags, "flags")
    156 
    157   entry = parse_entry("#./dirname/filename\tcat\tflags")
    158   assert_equals(entry.prefix, "#")
    159   assert_equals(entry.fullname, "./dirname/filename")
    160   assert_equals(entry.dirname, "./dirname")
    161   assert_equals(entry.basename, "filename")
    162   assert_equals(entry.category_col, 24)
    163   assert_equals(entry.category, "cat")
    164   assert_equals(entry.flags_col, 8)
    165   assert_equals(entry.flags, "flags")
    166 end)
    167 
    168 
    169 -- Return the smaller of the given values, ignoring nil.
    170 local function min(curr, value)
    171   if curr == nil or (value ~= nil and value < curr) then
    172     return value
    173   end
    174   return curr
    175 end
    176 
    177 test(function()
    178   assert_equals(min(nil, nil), nil)
    179   assert_equals(min(0, nil), 0)
    180   assert_equals(min(nil, 0), 0)
    181   assert_equals(min(0, 0), 0)
    182   assert_equals(min(1, -1), -1)
    183   assert_equals(min(-1, 1), -1)
    184 end)
    185 
    186 
    187 -- Return the larger of the given values, ignoring nil.
    188 local function max(curr, value)
    189   if curr == nil or (value ~= nil and value > curr) then
    190     return value
    191   end
    192   return curr
    193 end
    194 
    195 test(function()
    196   assert_equals(max(nil, nil), nil)
    197   assert_equals(max(0, nil), 0)
    198   assert_equals(max(nil, 0), 0)
    199   assert_equals(max(0, 0), 0)
    200   assert_equals(max(1, -1), 1)
    201   assert_equals(max(-1, 1), 1)
    202 end)
    203 
    204 
    205 -- Calculate the column on which the field should be aligned.
    206 local function column(entries, get_width_before, colname)
    207 
    208   local function nexttab(col)
    209     return col // 8 * 8 + 8
    210   end
    211 
    212   local currmin, currmax, required
    213 
    214   for _, entry in ipairs(entries) do
    215     local width = get_width_before(entry)
    216     if width ~= nil then
    217       required = max(required, width)
    218 
    219       local col = entry[colname]
    220       currmin = min(currmin, col)
    221       currmax = max(currmax, col)
    222     end
    223   end
    224 
    225   if currmin == currmax then
    226     return currmin, "aligned"
    227   end
    228   return nexttab(required), "unaligned"
    229 end
    230 
    231 test(function()
    232 
    233   local function width_before_category(entry)
    234     return tabwidth(entry.prefix .. entry.fullname)
    235   end
    236 
    237   local function width_before_flags(entry)
    238     return tabwidth(entry.category)
    239   end
    240 
    241   -- The entries are nicely aligned, therefore there is no need to change
    242   -- anything.
    243   local entries = {
    244     parse_entry("./file1\tcategory"),
    245     parse_entry("./file2\tcategory")
    246   }
    247   assert_equals(entries[2].category_col, 8)
    248   assert_equals(width_before_category(entries[2]), 7)
    249   assert_equals(column(entries, width_before_category, "category_col"), 8)
    250 
    251   -- The entries are currently not aligned, therefore they are aligned
    252   -- to the minimum required column.
    253   entries = {
    254     parse_entry("./file1\tcategory"),
    255     parse_entry("./directory/file2\tcategory"),
    256   }
    257   assert_equals(entries[2].category_col, 24)
    258   assert_equals(column(entries, width_before_category, "category_col"), 24)
    259 
    260   -- The entries are already aligned, therefore the current alignment is
    261   -- preserved, even though it is more than the minimum required alignment
    262   -- of 8.  There are probably reasons for the large indentation.
    263   entries = {
    264     parse_entry("./file1\t\t\tcategory"),
    265     parse_entry("./file2\t\t\tcategory")
    266   }
    267   assert_equals(column(entries, width_before_category, "category_col"), 24)
    268 
    269   -- The flags are already aligned, 4 tabs to the right of the category.
    270   -- There is no reason to change anything here.
    271   entries = {
    272     parse_entry("./file1\tcategory\t\t\tflags"),
    273     parse_entry("./file2\tcategory"),
    274     parse_entry("./file3\tcat\t\t\t\tflags")
    275   }
    276   assert_equals(column(entries, width_before_flags, "flags_col"), 32)
    277 
    278 end)
    279 
    280 
    281 -- Amend the entries by the tabs used for alignment.
    282 local function add_tabs(entries)
    283 
    284   local function width_before_category(entry)
    285     return tabwidth(entry.prefix .. entry.fullname)
    286   end
    287   local function width_before_flags(entry)
    288     if entry.flags ~= nil then
    289       return tabwidth(entry.category)
    290     end
    291   end
    292 
    293   local category_col, category_aligned =
    294     column(entries, width_before_category, "category_col")
    295   local flags_col = column(entries, width_before_flags, "flags_col")
    296 
    297   -- To avoid horizontal jumps for the category column, the minimum column is
    298   -- set to 56.  This way, the third column is usually set to 72, which is
    299   -- still visible on an 80-column screen.
    300   if category_aligned == "unaligned" then
    301     category_col = max(category_col, 56)
    302   end
    303 
    304   for _, entry in ipairs(entries) do
    305     local prefix = entry.prefix
    306     local fullname = entry.fullname
    307     local category = entry.category
    308     local flags = entry.flags
    309 
    310     if category ~= nil then
    311       entry.category_tabs = tabs(prefix .. fullname, category_col)
    312       if flags ~= nil then
    313         entry.flags_tabs = tabs(category, flags_col)
    314       end
    315     end
    316   end
    317 end
    318 
    319 test(function()
    320   local entries = {
    321     parse_entry("./file1\t\t\t\tcategory\t\tflags"),
    322     parse_entry("./file2\t\t\t\tcategory\t\tflags"),
    323     parse_entry("./file3\t\t\tcategory\t\tflags")
    324   }
    325   add_tabs(entries)
    326   assert_equals(entries[1].category_tabs, "\t\t\t\t\t\t\t")
    327   assert_equals(entries[2].category_tabs, "\t\t\t\t\t\t\t")
    328   assert_equals(entries[3].category_tabs, "\t\t\t\t\t\t\t")
    329   assert_equals(entries[1].flags_tabs, "\t\t")
    330   assert_equals(entries[2].flags_tabs, "\t\t")
    331   assert_equals(entries[3].flags_tabs, "\t\t")
    332 end)
    333 
    334 
    335 -- Normalize the alignment of the fields of the entries.
    336 local function normalize(entries)
    337 
    338   local function less(a, b)
    339     if a.fullname ~= b.fullname then
    340       -- To sort by directory first, comment out the following line.
    341       return a.fullname < b.fullname
    342     end
    343     if a.dirname ~= b.dirname then
    344       return a.dirname < b.dirname
    345     end
    346     if a.basename ~= b.basename then
    347       return a.basename < b.basename
    348     end
    349     if a.category ~= nil and b.category ~= nil and a.category ~= b.category then
    350       return a.category < b.category
    351     end
    352     return a.flags ~= nil and b.flags ~= nil and a.flags < b.flags
    353   end
    354   table.sort(entries, less)
    355 
    356   local function by_dirname(entry)
    357     return entry.dirname
    358   end
    359   foreach_group(entries, by_dirname, add_tabs)
    360 
    361 end
    362 
    363 
    364 -- Read a file list completely into memory.
    365 local function read_list(fname)
    366   local head = {}
    367   local entries = {}
    368   local errors = {}
    369 
    370   local f = assert(io.open(fname, "r"))
    371   local lineno = 0
    372   for line in f:lines() do
    373     lineno = lineno + 1
    374 
    375     local entry = parse_entry(line)
    376     if entry ~= nil then
    377       table.insert(entries, entry)
    378     elseif line:match("^#") then
    379       table.insert(head, line)
    380     else
    381       local msg = ("%s:%d: unknown line format %q"):format(fname, lineno, line)
    382       table.insert(errors, msg)
    383     end
    384   end
    385 
    386   f:close()
    387 
    388   return head, entries, errors
    389 end
    390 
    391 
    392 -- Write the normalized list file back to disk.
    393 --
    394 -- Duplicate lines are skipped.  This allows to append arbitrary lines to
    395 -- the end of the file and have them cleaned up automatically.
    396 local function write_list(fname, head, entries)
    397   local f = assert(io.open(fname, "w"))
    398 
    399   for _, line in ipairs(head) do
    400     f:write(line, "\n")
    401   end
    402 
    403   local prev_line = ""
    404   for _, entry in ipairs(entries) do
    405     local line = entry.prefix .. entry.fullname
    406     if entry.category ~= nil then
    407       line = line .. entry.category_tabs .. entry.category
    408     end
    409     if entry.flags ~= nil then
    410       line = line .. entry.flags_tabs .. entry.flags
    411     end
    412 
    413     if line ~= prev_line then
    414       prev_line = line
    415       f:write(line, "\n")
    416     else
    417       --print(("%s: duplicate entry: %s"):format(fname, line))
    418     end
    419   end
    420 
    421   f:close()
    422 end
    423 
    424 
    425 -- Load a file list, normalize it and write it back to disk.
    426 local function format_list(fname, write_back)
    427   local head, entries, errors = read_list(fname)
    428   if #errors > 0 then
    429     for _, err in ipairs(errors) do
    430       print(err)
    431     end
    432     return false
    433   end
    434 
    435   normalize(entries)
    436 
    437   if write_back then
    438     write_list(fname, head, entries)
    439   end
    440   return true
    441 end
    442 
    443 
    444 local function main(arg)
    445   local seen_error = false
    446   local write_back = true
    447   for _, fname in ipairs(arg) do
    448     if fname == "-n" then
    449       write_back = false
    450     else
    451       if not format_list(fname, write_back) then
    452         seen_error = true
    453       end
    454     end
    455   end
    456   return not seen_error
    457 end
    458 
    459 os.exit(main(arg))
    460