utf8.pl revision a5ae21e4
1a5ae21e4Smrg#!/usr/bin/env perl
2a5ae21e4Smrg# $XTermId: utf8.pl,v 1.5 2018/12/14 09:25:47 tom Exp $
3a5ae21e4Smrg# -----------------------------------------------------------------------------
4a5ae21e4Smrg# this file is part of xterm
5a5ae21e4Smrg#
6a5ae21e4Smrg# Copyright 2012,2018 by Thomas E. Dickey
7a5ae21e4Smrg#
8a5ae21e4Smrg#                         All Rights Reserved
9a5ae21e4Smrg#
10a5ae21e4Smrg# Permission is hereby granted, free of charge, to any person obtaining a
11a5ae21e4Smrg# copy of this software and associated documentation files (the
12a5ae21e4Smrg# "Software"), to deal in the Software without restriction, including
13a5ae21e4Smrg# without limitation the rights to use, copy, modify, merge, publish,
14a5ae21e4Smrg# distribute, sublicense, and/or sell copies of the Software, and to
15a5ae21e4Smrg# permit persons to whom the Software is furnished to do so, subject to
16a5ae21e4Smrg# the following conditions:
17a5ae21e4Smrg#
18a5ae21e4Smrg# The above copyright notice and this permission notice shall be included
19a5ae21e4Smrg# in all copies or substantial portions of the Software.
20a5ae21e4Smrg#
21a5ae21e4Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22a5ae21e4Smrg# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23a5ae21e4Smrg# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24a5ae21e4Smrg# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
25a5ae21e4Smrg# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26a5ae21e4Smrg# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27a5ae21e4Smrg# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28a5ae21e4Smrg#
29a5ae21e4Smrg# Except as contained in this notice, the name(s) of the above copyright
30a5ae21e4Smrg# holders shall not be used in advertising or otherwise to promote the
31a5ae21e4Smrg# sale, use or other dealings in this Software without prior written
32a5ae21e4Smrg# authorization.
33a5ae21e4Smrg# -----------------------------------------------------------------------------
34a5ae21e4Smrg# display the given Unicode characters, given their hex or decimal values.
35a5ae21e4Smrg
36a5ae21e4Smrguse strict;
37a5ae21e4Smrguse Encode 'encode_utf8';
38a5ae21e4Smrg
39a5ae21e4Smrgsub vxt_utf8($) {
40a5ae21e4Smrg	my $arg = $_[0];
41a5ae21e4Smrg	my $hex = $arg;
42a5ae21e4Smrg	my $dec = $arg;
43a5ae21e4Smrg	if ( $arg =~ /^u\+[[:xdigit:]]+/i ) {
44a5ae21e4Smrg		$hex =~ s/^../0x/;
45a5ae21e4Smrg		$dec = hex($hex);
46a5ae21e4Smrg	} elsif ( $arg !~ /^0x[[:xdigit:]]+$/i ) {
47a5ae21e4Smrg		$hex = sprintf "%04X", $arg;
48a5ae21e4Smrg	} elsif ( $arg !~ /^u\+[[:xdigit:]]+$/i ) {
49a5ae21e4Smrg		$hex =~ s/^u\+//i;
50a5ae21e4Smrg		$hex = sprintf "%04X", $arg;
51a5ae21e4Smrg	} else {
52a5ae21e4Smrg		$dec = hex($hex);
53a5ae21e4Smrg	}
54a5ae21e4Smrg	my $chr = chr($dec);
55a5ae21e4Smrg	my $type = ( $chr =~ /\p{isPrint}/
56a5ae21e4Smrg		     ? ( $chr =~ /\p{isAlpha}/
57a5ae21e4Smrg			 ? "alpha"
58a5ae21e4Smrg			 : ( $chr =~ /\p{isPunct}/
59a5ae21e4Smrg			     ? "punct"
60a5ae21e4Smrg			     : ( $chr =~ /\p{isDigit}/
61a5ae21e4Smrg			         ? "digit"
62a5ae21e4Smrg				 : "printing" ) ) )
63a5ae21e4Smrg		     : ( $chr =~ /\p{isCntrl}/
64a5ae21e4Smrg		         ? "cntrl"
65a5ae21e4Smrg			 : "nonprinting" ) );
66a5ae21e4Smrg	printf "%d ->%#x ->{%s} (%d bytes, %s)\n", $dec, $dec, $chr, length(Encode::encode_utf8($dec)), $type;
67a5ae21e4Smrg}
68a5ae21e4Smrg
69a5ae21e4Smrgbinmode(STDOUT, ":utf8");
70a5ae21e4Smrgwhile ( $#ARGV >= 0 ) {
71a5ae21e4Smrg	vxt_utf8 ( shift @ARGV );
72a5ae21e4Smrg}
73a5ae21e4Smrgexit;
74