1 #!/bin/sh 2 # SPDX-License-Identifier: 0BSD 3 4 ############################################################################### 5 # 6 # Look for missing license info in xz.git 7 # 8 # The project doesn't conform to the FSFE REUSE specification for now. 9 # Instead, this script helps in finding files that lack license info. 10 # Pass -v as an argument to get license info from all files in xz.git or, 11 # when .git isn't available, from files extracted from a release tarball 12 # (in case of a release tarball, the tree must be clean of any extra files). 13 # 14 # NOTE: This relies on non-POSIX xargs -0. It's supported on GNU and *BSDs. 15 # 16 ############################################################################### 17 # 18 # Author: Lasse Collin 19 # 20 ############################################################################### 21 22 # Print good files too if -v is passed as an argument. 23 VERBOSE=false 24 case $1 in 25 '') 26 ;; 27 -v) 28 VERBOSE=true 29 ;; 30 *) 31 echo "Usage: $0 [-v]" 32 exit 1 33 ;; 34 esac 35 36 37 # Use the C locale so that sorting is always the same. 38 LC_ALL=C 39 export LC_ALL 40 41 42 # String to match the SPDX license identifier tag. 43 # Spell it here in a way that doesn't match regular grep patterns. 44 SPDX_LI='SPDX''-License-''Identifier'':' 45 46 # Pattern for files that don't contain SPDX tags but they are under 47 # a free license that isn't 0BSD. 48 PAT_UNTAGGED_MISC='^COPYING\. 49 ^INSTALL\.generic$' 50 51 # Pattern for files that are 0BSD but don't contain SPDX tags. 52 # (The two file format specification files are public domain but 53 # they can be treated as 0BSD too.) 54 PAT_UNTAGGED_0BSD='^(.*/)?\.gitattributes$ 55 ^(.*/)?\.gitignore$ 56 ^\.github/SECURITY\.md$ 57 ^AUTHORS$ 58 ^COPYING$ 59 ^ChangeLog$ 60 ^INSTALL$ 61 ^NEWS$ 62 ^PACKAGERS$ 63 ^(.*/)?README$ 64 ^THANKS$ 65 ^TODO$ 66 ^(.*/)?[^/]+\.txt$ 67 ^doc/SHA256SUMS$ 68 ^po/LINGUAS$ 69 ^src/common/w32_application\.manifest$ 70 ^tests/xzgrep_expected_output$ 71 ^tests/files/[^/]+\.(lz|lzma|xz)$' 72 73 # Pattern for files that must be ignored when Git isn't available. This is 74 # useful when this script is run right after extracting a release tarball. 75 PAT_TARBALL_IGNORE='^(m4/)?[^/]*\.m4$ 76 ^(.*/)?Makefile\.in(\.in)?$ 77 ^(po|po4a)/.*[^.]..$ 78 ^ABOUT-NLS$ 79 ^build-aux/(config\..*|ltmain\.sh|[^.]*)$ 80 ^config\.h\.in$ 81 ^configure$' 82 83 84 # Go to the top source dir. 85 cd "$(dirname "$0")/.." || exit 1 86 87 # Get the list of files to check from git if possible. 88 # Otherwise list the whole source tree. This script should pass 89 # if it is run right after extracting a release tarball. 90 if test -d .git && type git > /dev/null 2>&1; then 91 FILES=$(git ls-files) || exit 1 92 IS_TARBALL=false 93 else 94 FILES=$(find . -type f) || exit 1 95 FILES=$(printf '%s\n' "$FILES" | sed 's,^\./,,') 96 IS_TARBALL=true 97 fi 98 99 # Sort to keep the order consistent. 100 FILES=$(printf '%s\n' "$FILES" | sort) 101 102 103 # Find the tagged files. 104 TAGGED=$(printf '%s\n' "$FILES" \ 105 | tr '\n' '\000' | xargs -0r grep -l "$SPDX_LI" --) 106 107 # Find the tagged 0BSD files. 108 TAGGED_0BSD=$(printf '%s\n' "$TAGGED" \ 109 | tr '\n' '\000' | xargs -0r grep -l "$SPDX_LI 0BSD" --) 110 111 # Find the tagged non-0BSD files, that is, remove the 0BSD-tagged files 112 # from the list of tagged files. 113 TAGGED_MISC=$(printf '%s\n%s\n' "$TAGGED" "$TAGGED_0BSD" | sort | uniq -u) 114 115 116 # Remove the tagged files from the list. 117 FILES=$(printf '%s\n%s\n' "$FILES" "$TAGGED" | sort | uniq -u) 118 119 # Find the intentionally-untagged files. 120 UNTAGGED_0BSD=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_0BSD") 121 UNTAGGED_MISC=$(printf '%s\n' "$FILES" | grep -E "$PAT_UNTAGGED_MISC") 122 123 # Remove the intentionally-untagged files from the list. 124 FILES=$(printf '%s\n' "$FILES" | grep -Ev \ 125 -e "$PAT_UNTAGGED_0BSD" -e "$PAT_UNTAGGED_MISC") 126 127 128 # FIXME: Allow untagged translations if they have a public domain notice. 129 # These are old translations that haven't been updated after 2024-02-14. 130 # Eventually these should go away. 131 PD_PO=$(printf '%s\n' "$FILES" | grep '\.po$' | tr '\n' '\000' \ 132 | xargs -0r grep -Fl '# This file is put in the public domain.' --) 133 134 if test -n "$PD_PO"; then 135 # Remove the public domain .po files from the list. 136 FILES=$(printf '%s\n%s\n' "$FILES" "$PD_PO" | sort | uniq -u) 137 fi 138 139 140 # Remove generated files from the list which don't have SPDX tags but which 141 # can be present in release tarballs. This step is skipped when the file list 142 # is from "git ls-files". 143 GENERATED= 144 if $IS_TARBALL; then 145 GENERATED=$(printf '%s\n' "$FILES" | grep -E "$PAT_TARBALL_IGNORE") 146 FILES=$(printf '%s\n' "$FILES" | grep -Ev "$PAT_TARBALL_IGNORE") 147 fi 148 149 150 if $VERBOSE; then 151 printf '# Tagged 0BSD files:\n%s\n\n' "$TAGGED_0BSD" 152 printf '# Intentionally untagged 0BSD:\n%s\n\n' "$UNTAGGED_0BSD" 153 154 # FIXME: Remove when no longer needed. 155 if test -n "$PD_PO"; then 156 printf '# Old public domain translations:\n%s\n\n' "$PD_PO" 157 fi 158 159 printf '# Tagged non-0BSD files:\n%s\n\n' "$TAGGED_MISC" 160 printf '# Intentionally untagged miscellaneous: \n%s\n\n' \ 161 "$UNTAGGED_MISC" 162 163 if test -n "$GENERATED"; then 164 printf '# Generated files whose license was NOT checked:\n%s\n\n' \ 165 "$GENERATED" 166 fi 167 fi 168 169 170 # Look for files with an unknown license and set the exit status accordingly. 171 STATUS=0 172 if test -n "$FILES"; then 173 printf '# ERROR: Licensing is unclear:\n%s\n' "$FILES" 174 STATUS=1 175 fi 176 177 exit "$STATUS" 178