Home | History | Annotate | Line # | Download | only in build-aux
      1 #!/bin/sh
      2 # SPDX-License-Identifier: 0BSD
      3 
      4 ###############################################################################
      5 #
      6 # Look for missing license info in xz.git
      7 #
      8 # The project doesn't conform to the FSFE REUSE specification for now.
      9 # Instead, this script helps in finding files that lack license info.
     10 # Pass -v as an argument to get license info from all files in xz.git or,
     11 # when .git isn't available, from files extracted from a release tarball
     12 # (in case of a release tarball, the tree must be clean of any extra files).
     13 #
     14 # NOTE: This relies on non-POSIX xargs -0. It's supported on GNU and *BSDs.
     15 #
     16 ###############################################################################
     17 #
     18 # Author: Lasse Collin
     19 #
     20 ###############################################################################
     21 
     22 # Print good files too if -v is passed as an argument.
     23 VERBOSE=false
     24 case $1 in
     25 	'')
     26 		;;
     27 	-v)
     28 		VERBOSE=true
     29 		;;
     30 	*)
     31 		echo "Usage: $0 [-v]"
     32 		exit 1
     33 		;;
     34 esac
     35 
     36 
     37 # Use the C locale so that sorting is always the same.
     38 LC_ALL=C
     39 export LC_ALL
     40 
     41 
     42 # String to match the SPDX license identifier tag.
     43 # Spell it here in a way that doesn't match regular grep patterns.
     44 SPDX_LI='SPDX''-License-''Identifier'':'
     45 
     46 # Pattern for files that don't contain SPDX tags but they are under
     47 # a free license that isn't 0BSD.
     48 PAT_UNTAGGED_MISC='^COPYING\.
     49 ^INSTALL\.generic$'
     50 
     51 # Pattern for files that are 0BSD but don't contain SPDX tags.
     52 # (The two file format specification files are public domain but
     53 # they can be treated as 0BSD too.)
     54 PAT_UNTAGGED_0BSD='^(.*/)?\.gitattributes$
     55 ^(.*/)?\.gitignore$
     56 ^\.github/SECURITY\.md$
     57 ^AUTHORS$
     58 ^COPYING$
     59 ^ChangeLog$
     60 ^INSTALL$
     61 ^NEWS$
     62 ^PACKAGERS$
     63 ^(.*/)?README$
     64 ^THANKS$
     65 ^TODO$
     66 ^(.*/)?[^/]+\.txt$
     67 ^doc/SHA256SUMS$
     68 ^po/LINGUAS$
     69 ^src/common/w32_application\.manifest$
     70 ^tests/xzgrep_expected_output$
     71 ^tests/files/[^/]+\.(lz|lzma|xz)$'
     72 
     73 # Pattern for files that must be ignored when Git isn't available. This is
     74 # useful when this script is run right after extracting a release tarball.
     75 PAT_TARBALL_IGNORE='^(m4/)?[^/]*\.m4$
     76 ^(.*/)?Makefile\.in(\.in)?$
     77 ^(po|po4a)/.*[^.]..$
     78 ^ABOUT-NLS$
     79 ^build-aux/(config\..*|ltmain\.sh|[^.]*)$
     80 ^config\.h\.in$
     81 ^configure$'
     82 
     83 
     84 # Go to the top source dir.
     85 cd "$(dirname "$0")/.." || exit 1
     86 
     87 # Get the list of files to check from git if possible.
     88 # Otherwise list the whole source tree. This script should pass
     89 # if it is run right after extracting a release tarball.
     90 if test -d .git && type git > /dev/null 2>&1; then
     91 	FILES=$(git ls-files) || exit 1
     92 	IS_TARBALL=false
     93 else
     94 	FILES=$(find . -type f) || exit 1
     95 	FILES=$(printf '%s\n' "$FILES" | sed 's,^\./,,')
     96 	IS_TARBALL=true
     97 fi
     98 
     99 # Sort to keep the order consistent.
    100 FILES=$(printf '%s\n' "$FILES" | sort)
    101 
    102 
    103 # Find the tagged files.
    104 TAGGED=$(printf '%s\n' "$FILES" \
    105 	| tr '\n' '\000' | xargs -0r grep -l "$SPDX_LI" --)
    106 
    107 # Find the tagged 0BSD files.
    108 TAGGED_0BSD=$(printf '%s\n' "$TAGGED" \
    109 	| tr '\n' '\000' | xargs -0r grep -l "$SPDX_LI 0BSD" --)
    110 
    111 # Find the tagged non-0BSD files, that is, remove the 0BSD-tagged files
    112 # from the list of tagged files.
    113 TAGGED_MISC=$(printf '%s\n%s\n' "$TAGGED" "$TAGGED_0BSD" | sort | uniq -u)
    114 
    115 
    116 # Remove the tagged files from the list.
    117 FILES=$(printf '%s\n%s\n' "$FILES" "$TAGGED" | sort | uniq -u)
    118 
    119 # Find the intentionally-untagged files.
    120 UNTAGGED_0BSD=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_0BSD")
    121 UNTAGGED_MISC=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_MISC")
    122 
    123 # Remove the intentionally-untagged files from the list.
    124 FILES=$(printf '%s\n' "$FILES" | grep -Ev \
    125 	-e "$PAT_UNTAGGED_0BSD" -e "$PAT_UNTAGGED_MISC")
    126 
    127 
    128 # FIXME: Allow untagged translations if they have a public domain notice.
    129 # These are old translations that haven't been updated after 2024-02-14.
    130 # Eventually these should go away.
    131 PD_PO=$(printf '%s\n' "$FILES" | grep '\.po$' | tr '\n' '\000' \
    132 	| xargs -0r grep -Fl '# This file is put in the public domain.' --)
    133 
    134 if test -n "$PD_PO"; then
    135 	# Remove the public domain .po files from the list.
    136 	FILES=$(printf '%s\n%s\n' "$FILES" "$PD_PO" | sort | uniq -u)
    137 fi
    138 
    139 
    140 # Remove generated files from the list which don't have SPDX tags but which
    141 # can be present in release tarballs. This step is skipped when the file list
    142 # is from "git ls-files".
    143 GENERATED=
    144 if $IS_TARBALL; then
    145 	GENERATED=$(printf '%s\n' "$FILES" | grep -E "$PAT_TARBALL_IGNORE")
    146 	FILES=$(printf '%s\n' "$FILES" | grep -Ev "$PAT_TARBALL_IGNORE")
    147 fi
    148 
    149 
    150 if $VERBOSE; then
    151 	printf '# Tagged 0BSD files:\n%s\n\n' "$TAGGED_0BSD"
    152 	printf '# Intentionally untagged 0BSD:\n%s\n\n' "$UNTAGGED_0BSD"
    153 
    154 	# FIXME: Remove when no longer needed.
    155 	if test -n "$PD_PO"; then
    156 		printf '# Old public domain translations:\n%s\n\n' "$PD_PO"
    157 	fi
    158 
    159 	printf '# Tagged non-0BSD files:\n%s\n\n' "$TAGGED_MISC"
    160 	printf '# Intentionally untagged miscellaneous: \n%s\n\n' \
    161 		"$UNTAGGED_MISC"
    162 
    163 	if test -n "$GENERATED"; then
    164 		printf '# Generated files whose license was NOT checked:\n%s\n\n' \
    165 			"$GENERATED"
    166 	fi
    167 fi
    168 
    169 
    170 # Look for files with an unknown license and set the exit status accordingly.
    171 STATUS=0
    172 if test -n "$FILES"; then
    173 	printf '# ERROR: Licensing is unclear:\n%s\n' "$FILES"
    174 	STATUS=1
    175 fi
    176 
    177 exit "$STATUS"
    178