utf8.pl revision 04b94745
1#!/usr/bin/env perl 2# $XTermId: utf8.pl,v 1.12 2022/07/08 18:32:43 tom Exp $ 3# ----------------------------------------------------------------------------- 4# this file is part of xterm 5# 6# Copyright 2012-2018,2022 by Thomas E. Dickey 7# 8# All Rights Reserved 9# 10# Permission is hereby granted, free of charge, to any person obtaining a 11# copy of this software and associated documentation files (the 12# "Software"), to deal in the Software without restriction, including 13# without limitation the rights to use, copy, modify, merge, publish, 14# distribute, sublicense, and/or sell copies of the Software, and to 15# permit persons to whom the Software is furnished to do so, subject to 16# the following conditions: 17# 18# The above copyright notice and this permission notice shall be included 19# in all copies or substantial portions of the Software. 20# 21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 22# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 23# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 24# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 25# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 26# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 27# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28# 29# Except as contained in this notice, the name(s) of the above copyright 30# holders shall not be used in advertising or otherwise to promote the 31# sale, use or other dealings in this Software without prior written 32# authorization. 33# ----------------------------------------------------------------------------- 34# display the given Unicode characters, given their hex or decimal values. 35 36use warnings FATAL => "overflow"; 37no warnings "portable"; 38use strict; 39use Encode 'encode_utf8'; 40use Text::CharWidth qw(mbswidth); 41 42$| = 1; 43 44sub num_bytes($) { 45 my $char = shift; 46 my $value = length( Encode::encode_utf8($char) ); 47 my $result = 48 ( $value <= 0 49 ? "no bytes" 50 : ( $value > 1 ? sprintf( "%d bytes", $value ) : "1 bytes" ) ); 51 return $result; 52} 53 54sub num_cells($) { 55 my $char = shift; 56 my $value = mbswidth($char); 57 my $result = 58 ( $value <= 0 59 ? "no cells" 60 : ( $value > 1 ? sprintf( "%d cells", $value ) : "1 cell" ) ); 61 return $result; 62} 63 64sub pad_column($) { 65 my $char = shift; 66 my $value = mbswidth($char); 67 $value = 0 if ( $value < 0); 68 my $result = sprintf( "%.*s", 3 - $value, " "); 69 return $result; 70} 71 72sub vxt_utf8($) { 73 my $arg = $_[0]; 74 my $hex = $arg; 75 my $dec = $arg; 76 if ( $arg =~ /^u\+[[:xdigit:]]+$/i ) { 77 $hex =~ s/^../0x/; 78 $dec = hex($hex); 79 } 80 elsif ( $arg =~ /^0x[[:xdigit:]]+$/i ) { 81 $dec = hex($hex); 82 } 83 elsif ( $arg =~ /^[[:xdigit:]]+$/i ) { 84 $dec = hex($hex); 85 } 86 else { 87 printf STDERR "? not a codepoint: $dec\n"; 88 return; 89 } 90 my $chr = chr($dec); 91 my $type = ( 92 $chr =~ /\p{isPrint}/ 93 ? ( 94 $chr =~ /\p{isAlpha}/ 95 ? "alpha" 96 : ( 97 $chr =~ /\p{isPunct}/ 98 ? "punct" 99 : ( 100 $chr =~ /\p{isDigit}/ 101 ? "digit" 102 : "printing" 103 ) 104 ) 105 ) 106 : ( 107 $chr =~ /\p{isCntrl}/ 108 ? "cntrl" 109 : "nonprinting" 110 ) 111 ); 112 printf "%d ->%#x ->{%s}%s(%s %s %s)\n", $dec, $dec, $chr, 113 &pad_column($chr), 114 &num_bytes($chr), 115 &num_cells($chr), 116 $type; 117} 118 119binmode( STDOUT, ":utf8" ); 120while ( $#ARGV >= 0 ) { 121 vxt_utf8( shift @ARGV ); 122} 123 1241; 125