utf8.pl revision a5ae21e4
1a5ae21e4Smrg#!/usr/bin/env perl 2a5ae21e4Smrg# $XTermId: utf8.pl,v 1.5 2018/12/14 09:25:47 tom Exp $ 3a5ae21e4Smrg# ----------------------------------------------------------------------------- 4a5ae21e4Smrg# this file is part of xterm 5a5ae21e4Smrg# 6a5ae21e4Smrg# Copyright 2012,2018 by Thomas E. Dickey 7a5ae21e4Smrg# 8a5ae21e4Smrg# All Rights Reserved 9a5ae21e4Smrg# 10a5ae21e4Smrg# Permission is hereby granted, free of charge, to any person obtaining a 11a5ae21e4Smrg# copy of this software and associated documentation files (the 12a5ae21e4Smrg# "Software"), to deal in the Software without restriction, including 13a5ae21e4Smrg# without limitation the rights to use, copy, modify, merge, publish, 14a5ae21e4Smrg# distribute, sublicense, and/or sell copies of the Software, and to 15a5ae21e4Smrg# permit persons to whom the Software is furnished to do so, subject to 16a5ae21e4Smrg# the following conditions: 17a5ae21e4Smrg# 18a5ae21e4Smrg# The above copyright notice and this permission notice shall be included 19a5ae21e4Smrg# in all copies or substantial portions of the Software. 20a5ae21e4Smrg# 21a5ae21e4Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 22a5ae21e4Smrg# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 23a5ae21e4Smrg# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 24a5ae21e4Smrg# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY 25a5ae21e4Smrg# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 26a5ae21e4Smrg# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 27a5ae21e4Smrg# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 28a5ae21e4Smrg# 29a5ae21e4Smrg# Except as contained in this notice, the name(s) of the above copyright 30a5ae21e4Smrg# holders shall not be used in advertising or otherwise to promote the 31a5ae21e4Smrg# sale, use or other dealings in this Software without prior written 32a5ae21e4Smrg# authorization. 33a5ae21e4Smrg# ----------------------------------------------------------------------------- 34a5ae21e4Smrg# display the given Unicode characters, given their hex or decimal values. 35a5ae21e4Smrg 36a5ae21e4Smrguse strict; 37a5ae21e4Smrguse Encode 'encode_utf8'; 38a5ae21e4Smrg 39a5ae21e4Smrgsub vxt_utf8($) { 40a5ae21e4Smrg my $arg = $_[0]; 41a5ae21e4Smrg my $hex = $arg; 42a5ae21e4Smrg my $dec = $arg; 43a5ae21e4Smrg if ( $arg =~ /^u\+[[:xdigit:]]+/i ) { 44a5ae21e4Smrg $hex =~ s/^../0x/; 45a5ae21e4Smrg $dec = hex($hex); 46a5ae21e4Smrg } elsif ( $arg !~ /^0x[[:xdigit:]]+$/i ) { 47a5ae21e4Smrg $hex = sprintf "%04X", $arg; 48a5ae21e4Smrg } elsif ( $arg !~ /^u\+[[:xdigit:]]+$/i ) { 49a5ae21e4Smrg $hex =~ s/^u\+//i; 50a5ae21e4Smrg $hex = sprintf "%04X", $arg; 51a5ae21e4Smrg } else { 52a5ae21e4Smrg $dec = hex($hex); 53a5ae21e4Smrg } 54a5ae21e4Smrg my $chr = chr($dec); 55a5ae21e4Smrg my $type = ( $chr =~ /\p{isPrint}/ 56a5ae21e4Smrg ? ( $chr =~ /\p{isAlpha}/ 57a5ae21e4Smrg ? "alpha" 58a5ae21e4Smrg : ( $chr =~ /\p{isPunct}/ 59a5ae21e4Smrg ? "punct" 60a5ae21e4Smrg : ( $chr =~ /\p{isDigit}/ 61a5ae21e4Smrg ? "digit" 62a5ae21e4Smrg : "printing" ) ) ) 63a5ae21e4Smrg : ( $chr =~ /\p{isCntrl}/ 64a5ae21e4Smrg ? "cntrl" 65a5ae21e4Smrg : "nonprinting" ) ); 66a5ae21e4Smrg printf "%d ->%#x ->{%s} (%d bytes, %s)\n", $dec, $dec, $chr, length(Encode::encode_utf8($dec)), $type; 67a5ae21e4Smrg} 68a5ae21e4Smrg 69a5ae21e4Smrgbinmode(STDOUT, ":utf8"); 70a5ae21e4Smrgwhile ( $#ARGV >= 0 ) { 71a5ae21e4Smrg vxt_utf8 ( shift @ARGV ); 72a5ae21e4Smrg} 73a5ae21e4Smrgexit; 74