fmt-list revision 1.3
11.1Srillig#! /usr/bin/lua
21.3Srillig-- $NetBSD: fmt-list,v 1.3 2020/11/02 20:14:01 rillig Exp $
31.1Srillig
41.1Srillig--[[
51.1Srillig
61.1SrilligAlign the lines of a file list so that all lines from the same directory
71.1Srillighave the other fields at the same indentation.
81.1Srillig
91.1SrilligSort the lines and remove duplicate lines.
101.1Srillig
111.2Srilligusage: ./fmt-list [-n] */*/{mi,ad.*,md.*}
121.1Srillig
131.1Srillig]]
141.1Srillig
151.1Srilliglocal function test(func)
161.1Srillig  func()
171.1Srilligend
181.1Srillig
191.1Srilliglocal function assert_equals(got, expected)
201.1Srillig  if got ~= expected then
211.1Srillig    assert(false, string.format("got %q, expected %q", got, expected))
221.1Srillig  end
231.1Srilligend
241.1Srillig
251.1Srillig
261.1Srillig-- Calculate the width of the given string on the screen, assuming that
271.1Srillig-- the tab width is 8 and that the string starts at a tabstop.
281.1Srilliglocal function tabwidth(str)
291.1Srillig  local width = 0
301.1Srillig  for i = 1, #str do
311.1Srillig    if string.sub(str, i, i) == "\t" then
321.1Srillig      width = width // 8 * 8 + 8
331.1Srillig    else
341.1Srillig      width = width + 1
351.1Srillig    end
361.1Srillig  end
371.1Srillig  return width
381.1Srilligend
391.1Srillig
401.1Srilligtest(function()
411.1Srillig  assert_equals(tabwidth(""), 0)
421.1Srillig  assert_equals(tabwidth("1234"), 4)
431.1Srillig  assert_equals(tabwidth("\t"), 8)
441.1Srillig  assert_equals(tabwidth("1234567\t"), 8)
451.1Srillig  assert_equals(tabwidth("\t1234\t"), 16)
461.1Srillig  assert_equals(tabwidth("\t1234\t1"), 17)
471.1Srilligend)
481.1Srillig
491.1Srillig
501.1Srillig-- Calculate the tab characters that are necessary to set the width
511.1Srillig-- of the string to the desired width.
521.1Srilliglocal function tabs(str, width)
531.1Srillig  local strwidth = tabwidth(str)
541.1Srillig  local tabs = string.rep("\t", (width - strwidth + 7) // 8)
551.1Srillig  if tabs == "" then
561.1Srillig    error(string.format("%q\t%d\t%d", str, strwidth, width))
571.1Srillig  end
581.1Srillig  assert(tabs ~= "")
591.1Srillig  return tabs
601.1Srilligend
611.1Srillig
621.1Srilligtest(function()
631.1Srillig  assert_equals(tabs("", 8), "\t")
641.1Srillig  assert_equals(tabs("1234567", 8), "\t")
651.1Srillig  assert_equals(tabs("", 64), "\t\t\t\t\t\t\t\t")
661.1Srilligend)
671.1Srillig
681.1Srillig
691.1Srillig-- Group the items by a key and then execute the action on each of the
701.1Srillig-- groups.
711.1Srilliglocal function foreach_group(items, get_key, action)
721.1Srillig  local key
731.1Srillig  local group = {}
741.1Srillig  for _, item in ipairs(items) do
751.1Srillig    local item_key = assert(get_key(item))
761.1Srillig    if item_key ~= key then
771.1Srillig      if #group > 0 then action(group, key) end
781.1Srillig      key = item_key
791.1Srillig      group = {}
801.1Srillig    end
811.1Srillig    table.insert(group, item)
821.1Srillig  end
831.1Srillig  if #group > 0 then action(group, key) end
841.1Srilligend
851.1Srillig
861.1Srilligtest(function()
871.1Srillig  local items = {
881.1Srillig    {"prime", 2},
891.1Srillig    {"prime", 3},
901.1Srillig    {"not prime", 4},
911.1Srillig    {"prime", 5},
921.1Srillig    {"prime", 7}
931.1Srillig  }
941.1Srillig  local result = ""
951.1Srillig  foreach_group(
961.1Srillig    items,
971.1Srillig    function(item) return item[1] end,
981.1Srillig    function(group, key)
991.1Srillig      result = result .. string.format("%d %s\n", #group, key)
1001.1Srillig    end)
1011.1Srillig  assert_equals(result, "2 prime\n1 not prime\n2 prime\n")
1021.1Srilligend)
1031.1Srillig
1041.1Srillig
1051.1Srillig-- Parse a line from a file list and split it into its meaningful parts.
1061.1Srilliglocal function parse_entry(line)
1071.1Srillig
1081.1Srillig  local category_align, prefix, fullname, flags_align, category, flags =
1091.1Srillig    line:match("^((%-?)(%.%S*)%s+)((%S+)%s+)(%S+)$")
1101.1Srillig  if fullname == nil then
1111.1Srillig    category_align, prefix, fullname, category =
1121.1Srillig      line:match("^((%-?)(%.%S*)%s+)(%S+)$")
1131.1Srillig  end
1141.1Srillig  if fullname == nil then
1151.1Srillig    prefix, fullname = line:match("^(%-)(%.%S*)$")
1161.1Srillig  end
1171.1Srillig  if fullname == nil then
1181.1Srillig    return
1191.1Srillig  end
1201.1Srillig
1211.1Srillig  local dirname, basename = fullname:match("^(.+)/([^/]+)$")
1221.1Srillig  if dirname == nil then
1231.1Srillig    dirname, basename = "", fullname
1241.1Srillig  end
1251.1Srillig
1261.1Srillig  local category_col, flags_col
1271.1Srillig  if category_align ~= nil then
1281.1Srillig    category_col = tabwidth(category_align)
1291.1Srillig  end
1301.1Srillig  if flags_align ~= nil then
1311.1Srillig    flags_col = tabwidth(flags_align)
1321.1Srillig  end
1331.1Srillig
1341.1Srillig  return {
1351.1Srillig    prefix = prefix,
1361.1Srillig    fullname = fullname,
1371.1Srillig    dirname = dirname,
1381.1Srillig    basename = basename,
1391.1Srillig    category_col = category_col,
1401.1Srillig    category = category,
1411.1Srillig    flags_col = flags_col,
1421.1Srillig    flags = flags
1431.1Srillig  }
1441.1Srilligend
1451.1Srillig
1461.1Srilligtest(function()
1471.1Srillig  local entry = parse_entry("./dirname/filename\t\t\tcategory\tflags")
1481.1Srillig  assert_equals(entry.prefix, "")
1491.1Srillig  assert_equals(entry.fullname, "./dirname/filename")
1501.1Srillig  assert_equals(entry.dirname, "./dirname")
1511.1Srillig  assert_equals(entry.basename, "filename")
1521.1Srillig  assert_equals(entry.category_col, 40)
1531.1Srillig  assert_equals(entry.category, "category")
1541.1Srillig  assert_equals(entry.flags_col, 16)
1551.1Srillig  assert_equals(entry.flags, "flags")
1561.1Srilligend)
1571.1Srillig
1581.1Srillig
1591.1Srillig-- Return the smaller of the given values, ignoring nil.
1601.1Srilliglocal function min(curr, value)
1611.1Srillig  if curr == nil or (value ~= nil and value < curr) then
1621.1Srillig    return value
1631.1Srillig  end
1641.1Srillig  return curr
1651.1Srilligend
1661.1Srillig
1671.1Srilligtest(function()
1681.1Srillig  assert_equals(min(nil, nil), nil)
1691.1Srillig  assert_equals(min(0, nil), 0)
1701.1Srillig  assert_equals(min(nil, 0), 0)
1711.1Srillig  assert_equals(min(0, 0), 0)
1721.1Srillig  assert_equals(min(1, -1), -1)
1731.1Srillig  assert_equals(min(-1, 1), -1)
1741.1Srilligend)
1751.1Srillig
1761.1Srillig
1771.1Srillig-- Return the larger of the given values, ignoring nil.
1781.1Srilliglocal function max(curr, value)
1791.1Srillig  if curr == nil or (value ~= nil and value > curr) then
1801.1Srillig    return value
1811.1Srillig  end
1821.1Srillig  return curr
1831.1Srilligend
1841.1Srillig
1851.1Srilligtest(function()
1861.1Srillig  assert_equals(max(nil, nil), nil)
1871.1Srillig  assert_equals(max(0, nil), 0)
1881.1Srillig  assert_equals(max(nil, 0), 0)
1891.1Srillig  assert_equals(max(0, 0), 0)
1901.1Srillig  assert_equals(max(1, -1), 1)
1911.1Srillig  assert_equals(max(-1, 1), 1)
1921.1Srilligend)
1931.1Srillig
1941.1Srillig
1951.1Srillig-- Calculate the column on which the field should be aligned.
1961.1Srilliglocal function column(entries, get_width_before, colname)
1971.1Srillig
1981.1Srillig  local function nexttab(col)
1991.1Srillig    return col // 8 * 8 + 8
2001.1Srillig  end
2011.1Srillig
2021.1Srillig  local currmin, currmax, required
2031.1Srillig
2041.1Srillig  for _, entry in ipairs(entries) do
2051.1Srillig    local width = get_width_before(entry)
2061.1Srillig    if width ~= nil then
2071.1Srillig      required = max(required, width)
2081.1Srillig
2091.1Srillig      local col = entry[colname]
2101.1Srillig      currmin = min(currmin, col)
2111.1Srillig      currmax = max(currmax, col)
2121.1Srillig    end
2131.1Srillig  end
2141.1Srillig
2151.1Srillig  if currmin == currmax then
2161.1Srillig    return currmin, "aligned"
2171.1Srillig  end
2181.1Srillig  return nexttab(required), "unaligned"
2191.1Srilligend
2201.1Srillig
2211.1Srilligtest(function()
2221.1Srillig
2231.1Srillig  local function width_before_category(entry)
2241.1Srillig    return tabwidth(entry.prefix .. entry.fullname)
2251.1Srillig  end
2261.1Srillig
2271.1Srillig  local function width_before_flags(entry)
2281.1Srillig    return tabwidth(entry.category)
2291.1Srillig  end
2301.1Srillig
2311.1Srillig  -- The entries are nicely aligned, therefore there is no need to change
2321.1Srillig  -- anything.
2331.1Srillig  local entries = {
2341.1Srillig    parse_entry("./file1\tcategory"),
2351.1Srillig    parse_entry("./file2\tcategory")
2361.1Srillig  }
2371.1Srillig  assert_equals(entries[2].category_col, 8)
2381.1Srillig  assert_equals(width_before_category(entries[2]), 7)
2391.1Srillig  assert_equals(column(entries, width_before_category, "category_col"), 8)
2401.1Srillig
2411.1Srillig  -- The entries are currently not aligned, therefore they are aligned
2421.1Srillig  -- to the minimum required column.
2431.1Srillig  entries = {
2441.1Srillig    parse_entry("./file1\tcategory"),
2451.1Srillig    parse_entry("./directory/file2\tcategory"),
2461.1Srillig  }
2471.1Srillig  assert_equals(entries[2].category_col, 24)
2481.1Srillig  assert_equals(column(entries, width_before_category, "category_col"), 24)
2491.1Srillig
2501.1Srillig  -- The entries are already aligned, therefore the current alignment is
2511.1Srillig  -- preserved, even though it is more than the minimum required alignment
2521.1Srillig  -- of 8.  There are probably reasons for the large indentation.
2531.1Srillig  entries = {
2541.1Srillig    parse_entry("./file1\t\t\tcategory"),
2551.1Srillig    parse_entry("./file2\t\t\tcategory")
2561.1Srillig  }
2571.1Srillig  assert_equals(column(entries, width_before_category, "category_col"), 24)
2581.1Srillig
2591.1Srillig  -- The flags are already aligned, 4 tabs to the right of the category.
2601.1Srillig  -- There is no reason to change anything here.
2611.1Srillig  entries = {
2621.1Srillig    parse_entry("./file1\tcategory\t\t\tflags"),
2631.1Srillig    parse_entry("./file2\tcategory"),
2641.1Srillig    parse_entry("./file3\tcat\t\t\t\tflags")
2651.1Srillig  }
2661.1Srillig  assert_equals(column(entries, width_before_flags, "flags_col"), 32)
2671.1Srillig
2681.1Srilligend)
2691.1Srillig
2701.1Srillig
2711.1Srillig-- Amend the entries by the tabs used for alignment.
2721.1Srilliglocal function add_tabs(entries)
2731.1Srillig
2741.1Srillig  local function width_before_category(entry)
2751.1Srillig    return tabwidth(entry.prefix .. entry.fullname)
2761.1Srillig  end
2771.1Srillig  local function width_before_flags(entry)
2781.1Srillig    if entry.flags ~= nil then
2791.1Srillig      return tabwidth(entry.category)
2801.1Srillig    end
2811.1Srillig  end
2821.1Srillig
2831.1Srillig  local category_col, category_aligned =
2841.1Srillig    column(entries, width_before_category, "category_col")
2851.1Srillig  local flags_col = column(entries, width_before_flags, "flags_col")
2861.1Srillig
2871.1Srillig  -- To avoid horizontal jumps for the column, the minimum column is set
2881.1Srillig  -- to 56.  This way, the third column is usually set to 72, which is
2891.1Srillig  -- still visible on an 80-column screen.
2901.1Srillig  if category_aligned == "unaligned" then
2911.1Srillig    category_col = max(category_col, 56)
2921.1Srillig  end
2931.1Srillig
2941.1Srillig  for _, entry in ipairs(entries) do
2951.1Srillig    local prefix = entry.prefix
2961.1Srillig    local fullname = entry.fullname
2971.1Srillig    local category = entry.category
2981.1Srillig    local flags = entry.flags
2991.1Srillig
3001.1Srillig    if category ~= nil then
3011.1Srillig      entry.category_tabs = tabs(prefix .. fullname, category_col)
3021.1Srillig      if flags ~= nil then
3031.1Srillig        entry.flags_tabs = tabs(category, flags_col)
3041.1Srillig      end
3051.1Srillig    end
3061.1Srillig  end
3071.1Srilligend
3081.1Srillig
3091.1Srilligtest(function()
3101.1Srillig  local entries = {
3111.1Srillig    parse_entry("./file1\t\t\t\tcategory\t\tflags"),
3121.1Srillig    parse_entry("./file2\t\t\t\tcategory\t\tflags"),
3131.1Srillig    parse_entry("./file3\t\t\tcategory\t\tflags")
3141.1Srillig  }
3151.1Srillig  add_tabs(entries)
3161.1Srillig  assert_equals(entries[1].category_tabs, "\t\t\t\t\t\t\t")
3171.1Srillig  assert_equals(entries[2].category_tabs, "\t\t\t\t\t\t\t")
3181.1Srillig  assert_equals(entries[3].category_tabs, "\t\t\t\t\t\t\t")
3191.1Srillig  assert_equals(entries[1].flags_tabs, "\t\t")
3201.1Srillig  assert_equals(entries[2].flags_tabs, "\t\t")
3211.1Srillig  assert_equals(entries[3].flags_tabs, "\t\t")
3221.1Srilligend)
3231.1Srillig
3241.1Srillig
3251.1Srillig-- Normalize the alignment of the fields of the entries.
3261.1Srilliglocal function normalize(entries)
3271.1Srillig
3281.1Srillig  local function less(a, b)
3291.1Srillig    if a.fullname ~= b.fullname then
3301.1Srillig      return a.fullname < b.fullname
3311.1Srillig    end
3321.1Srillig    if a.category ~= nil and b.category ~= nil and a.category ~= b.category then
3331.1Srillig      return a.category < b.category
3341.1Srillig    end
3351.1Srillig    return a.flags ~= nil and b.flags ~= nil and a.flags < b.flags
3361.1Srillig  end
3371.1Srillig  table.sort(entries, less)
3381.1Srillig
3391.1Srillig  local function by_dirname(entry)
3401.1Srillig    return entry.dirname
3411.1Srillig  end
3421.1Srillig  foreach_group(entries, by_dirname, add_tabs)
3431.1Srillig
3441.1Srilligend
3451.1Srillig
3461.1Srillig
3471.1Srillig-- Read a file list completely into memory.
3481.1Srilliglocal function read_list(fname)
3491.1Srillig  local head = {}
3501.1Srillig  local entries = {}
3511.1Srillig  local errors = {}
3521.1Srillig
3531.1Srillig  local f = assert(io.open(fname, "r"))
3541.1Srillig  local lineno = 0
3551.1Srillig  for line in f:lines() do
3561.1Srillig    lineno = lineno + 1
3571.1Srillig
3581.1Srillig    local entry = parse_entry(line)
3591.1Srillig    if entry ~= nil then
3601.1Srillig      table.insert(entries, entry)
3611.1Srillig    elseif line:match("^#") then
3621.1Srillig      table.insert(head, line)
3631.1Srillig    else
3641.1Srillig      local msg = string.format(
3651.1Srillig        "%s:%d: unknown line format %q", fname, lineno, line)
3661.1Srillig      table.insert(errors, msg)
3671.1Srillig    end
3681.1Srillig  end
3691.1Srillig
3701.1Srillig  f:close()
3711.1Srillig
3721.1Srillig  return head, entries, errors
3731.1Srilligend
3741.1Srillig
3751.1Srillig
3761.1Srillig-- Write the normalized list file back to disk.
3771.1Srillig--
3781.1Srillig-- Duplicate lines are skipped.  This allows to append arbitrary lines to
3791.1Srillig-- the end of the file and have them cleaned up automatically.
3801.1Srilliglocal function write_list(fname, head, entries)
3811.1Srillig  local f = assert(io.open(fname, "w"))
3821.1Srillig
3831.1Srillig  for _, line in ipairs(head) do
3841.1Srillig    f:write(line, "\n")
3851.1Srillig  end
3861.1Srillig
3871.1Srillig  local prev_line = ""
3881.1Srillig  for _, entry in ipairs(entries) do
3891.1Srillig    local line = entry.prefix .. entry.fullname
3901.1Srillig    if entry.category ~= nil then
3911.1Srillig      line = line .. entry.category_tabs .. entry.category
3921.1Srillig    end
3931.1Srillig    if entry.flags ~= nil then
3941.1Srillig      line = line .. entry.flags_tabs .. entry.flags
3951.1Srillig    end
3961.1Srillig
3971.1Srillig    if line ~= prev_line then
3981.1Srillig      prev_line = line
3991.1Srillig      f:write(line, "\n")
4001.1Srillig    end
4011.1Srillig  end
4021.1Srillig
4031.1Srillig  f:close()
4041.1Srilligend
4051.1Srillig
4061.1Srillig
4071.1Srillig-- Load a file list, normalize it and write it back to disk.
4081.2Srilliglocal function format_list(fname, write_back)
4091.1Srillig  local head, entries, errors = read_list(fname)
4101.1Srillig  if #errors > 0 then
4111.1Srillig    for _, err in ipairs(errors) do
4121.1Srillig      print(err)
4131.1Srillig    end
4141.3Srillig    return false
4151.1Srillig  end
4161.1Srillig
4171.1Srillig  normalize(entries)
4181.2Srillig
4191.2Srillig  if write_back then
4201.2Srillig    write_list(fname, head, entries)
4211.2Srillig  end
4221.3Srillig  return true
4231.1Srilligend
4241.1Srillig
4251.1Srillig
4261.2Srilliglocal function main(arg)
4271.3Srillig  local seen_error = false
4281.2Srillig  local write_back = true
4291.2Srillig  for _, fname in ipairs(arg) do
4301.2Srillig    if fname == "-n" then
4311.2Srillig      write_back = false
4321.2Srillig    else
4331.3Srillig      if not format_list(fname, write_back) then
4341.3Srillig        seen_error = true
4351.3Srillig      end
4361.2Srillig    end
4371.2Srillig  end
4381.3Srillig  return not seen_error
4391.1Srilligend
4401.2Srillig
4411.3Srilligos.exit(main(arg))
442