1a5ae21e4Smrg#!/usr/bin/env perl 204b94745Smrg# $XTermId: utf8.pl,v 1.12 2022/07/08 18:32:43 tom Exp $ 3a5ae21e4Smrg# ----------------------------------------------------------------------------- 4a5ae21e4Smrg# this file is part of xterm 5a5ae21e4Smrg# 65307cd1aSmrg# Copyright 2012-2018,2022 by Thomas E. Dickey 75307cd1aSmrg# 8a5ae21e4Smrg# All Rights Reserved 95307cd1aSmrg# 10a5ae21e4Smrg# Permission is hereby granted, free of charge, to any person obtaining a 11a5ae21e4Smrg# copy of this software and associated documentation files (the 12a5ae21e4Smrg# "Software"), to deal in the Software without restriction, including 13a5ae21e4Smrg# without limitation the rights to use, copy, modify, merge, publish, 14a5ae21e4Smrg# distribute, sublicense, and/or sell copies of the Software, and to 15a5ae21e4Smrg# permit persons to whom the Software is furnished to do so, subject to 16a5ae21e4Smrg# the following conditions: 175307cd1aSmrg# 18a5ae21e4Smrg# The above copyright notice and this permission notice shall be included 19a5ae21e4Smrg# in all copies or substantial portions of the Software. 205307cd1aSmrg# 21a5ae21e4Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 22a5ae21e4Smrg# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 23a5ae21e4Smrg# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 24a5ae21e4Smrg# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 25a5ae21e4Smrg# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 26a5ae21e4Smrg# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 27a5ae21e4Smrg# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 285307cd1aSmrg# 29a5ae21e4Smrg# Except as contained in this notice, the name(s) of the above copyright 30a5ae21e4Smrg# holders shall not be used in advertising or otherwise to promote the 31a5ae21e4Smrg# sale, use or other dealings in this Software without prior written 32a5ae21e4Smrg# authorization. 33a5ae21e4Smrg# ----------------------------------------------------------------------------- 34a5ae21e4Smrg# display the given Unicode characters, given their hex or decimal values. 35a5ae21e4Smrg 365307cd1aSmrguse warnings FATAL => "overflow"; 375307cd1aSmrgno warnings "portable"; 38a5ae21e4Smrguse strict; 39a5ae21e4Smrguse Encode 'encode_utf8'; 405307cd1aSmrguse Text::CharWidth qw(mbswidth); 415307cd1aSmrg 425307cd1aSmrg$| = 1; 435307cd1aSmrg 445307cd1aSmrgsub num_bytes($) { 455307cd1aSmrg my $char = shift; 465307cd1aSmrg my $value = length( Encode::encode_utf8($char) ); 475307cd1aSmrg my $result = 485307cd1aSmrg ( $value <= 0 495307cd1aSmrg ? "no bytes" 505307cd1aSmrg : ( $value > 1 ? sprintf( "%d bytes", $value ) : "1 bytes" ) ); 515307cd1aSmrg return $result; 525307cd1aSmrg} 535307cd1aSmrg 545307cd1aSmrgsub num_cells($) { 555307cd1aSmrg my $char = shift; 565307cd1aSmrg my $value = mbswidth($char); 575307cd1aSmrg my $result = 585307cd1aSmrg ( $value <= 0 595307cd1aSmrg ? "no cells" 605307cd1aSmrg : ( $value > 1 ? sprintf( "%d cells", $value ) : "1 cell" ) ); 615307cd1aSmrg return $result; 625307cd1aSmrg} 635307cd1aSmrg 645307cd1aSmrgsub pad_column($) { 655307cd1aSmrg my $char = shift; 665307cd1aSmrg my $value = mbswidth($char); 675307cd1aSmrg $value = 0 if ( $value < 0); 685307cd1aSmrg my $result = sprintf( "%.*s", 3 - $value, " "); 695307cd1aSmrg return $result; 705307cd1aSmrg} 71a5ae21e4Smrg 72a5ae21e4Smrgsub vxt_utf8($) { 735307cd1aSmrg my $arg = $_[0]; 745307cd1aSmrg my $hex = $arg; 755307cd1aSmrg my $dec = $arg; 765307cd1aSmrg if ( $arg =~ /^u\+[[:xdigit:]]+$/i ) { 775307cd1aSmrg $hex =~ s/^../0x/; 785307cd1aSmrg $dec = hex($hex); 795307cd1aSmrg } 805307cd1aSmrg elsif ( $arg =~ /^0x[[:xdigit:]]+$/i ) { 815307cd1aSmrg $dec = hex($hex); 825307cd1aSmrg } 835307cd1aSmrg elsif ( $arg =~ /^[[:xdigit:]]+$/i ) { 845307cd1aSmrg $dec = hex($hex); 855307cd1aSmrg } 865307cd1aSmrg else { 875307cd1aSmrg printf STDERR "? not a codepoint: $dec\n"; 885307cd1aSmrg return; 895307cd1aSmrg } 905307cd1aSmrg my $chr = chr($dec); 915307cd1aSmrg my $type = ( 925307cd1aSmrg $chr =~ /\p{isPrint}/ 935307cd1aSmrg ? ( 945307cd1aSmrg $chr =~ /\p{isAlpha}/ 955307cd1aSmrg ? "alpha" 965307cd1aSmrg : ( 975307cd1aSmrg $chr =~ /\p{isPunct}/ 985307cd1aSmrg ? "punct" 995307cd1aSmrg : ( 1005307cd1aSmrg $chr =~ /\p{isDigit}/ 1015307cd1aSmrg ? "digit" 1025307cd1aSmrg : "printing" 1035307cd1aSmrg ) 1045307cd1aSmrg ) 1055307cd1aSmrg ) 1065307cd1aSmrg : ( 1075307cd1aSmrg $chr =~ /\p{isCntrl}/ 1085307cd1aSmrg ? "cntrl" 1095307cd1aSmrg : "nonprinting" 1105307cd1aSmrg ) 1115307cd1aSmrg ); 1125307cd1aSmrg printf "%d ->%#x ->{%s}%s(%s %s %s)\n", $dec, $dec, $chr, 1135307cd1aSmrg &pad_column($chr), 1145307cd1aSmrg &num_bytes($chr), 1155307cd1aSmrg &num_cells($chr), 1165307cd1aSmrg $type; 117a5ae21e4Smrg} 118a5ae21e4Smrg 1195307cd1aSmrgbinmode( STDOUT, ":utf8" ); 120a5ae21e4Smrgwhile ( $#ARGV >= 0 ) { 1215307cd1aSmrg vxt_utf8( shift @ARGV ); 122a5ae21e4Smrg} 1235307cd1aSmrg 1245307cd1aSmrg1; 125