utf8.pl revision 04b94745
1#!/usr/bin/env perl
2# $XTermId: utf8.pl,v 1.12 2022/07/08 18:32:43 tom Exp $
3# -----------------------------------------------------------------------------
4# this file is part of xterm
5#
6# Copyright 2012-2018,2022 by Thomas E. Dickey
7#
8#                         All Rights Reserved
9#
10# Permission is hereby granted, free of charge, to any person obtaining a
11# copy of this software and associated documentation files (the
12# "Software"), to deal in the Software without restriction, including
13# without limitation the rights to use, copy, modify, merge, publish,
14# distribute, sublicense, and/or sell copies of the Software, and to
15# permit persons to whom the Software is furnished to do so, subject to
16# the following conditions:
17#
18# The above copyright notice and this permission notice shall be included
19# in all copies or substantial portions of the Software.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
25# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28#
29# Except as contained in this notice, the name(s) of the above copyright
30# holders shall not be used in advertising or otherwise to promote the
31# sale, use or other dealings in this Software without prior written
32# authorization.
33# -----------------------------------------------------------------------------
34# display the given Unicode characters, given their hex or decimal values.
35
36use warnings FATAL => "overflow";
37no warnings "portable";
38use strict;
39use Encode 'encode_utf8';
40use Text::CharWidth qw(mbswidth);
41
42$| = 1;
43
44sub num_bytes($) {
45    my $char  = shift;
46    my $value = length( Encode::encode_utf8($char) );
47    my $result =
48      ( $value <= 0
49        ? "no bytes"
50        : ( $value > 1 ? sprintf( "%d bytes", $value ) : "1 bytes" ) );
51    return $result;
52}
53
54sub num_cells($) {
55    my $char  = shift;
56    my $value = mbswidth($char);
57    my $result =
58      ( $value <= 0
59        ? "no cells"
60        : ( $value > 1 ? sprintf( "%d cells", $value ) : "1 cell" ) );
61    return $result;
62}
63
64sub pad_column($) {
65    my $char  = shift;
66    my $value = mbswidth($char);
67    $value = 0 if ( $value < 0);
68    my $result = sprintf( "%.*s", 3 - $value, "    ");
69    return $result;
70}
71
72sub vxt_utf8($) {
73    my $arg = $_[0];
74    my $hex = $arg;
75    my $dec = $arg;
76    if ( $arg =~ /^u\+[[:xdigit:]]+$/i ) {
77        $hex =~ s/^../0x/;
78        $dec = hex($hex);
79    }
80    elsif ( $arg =~ /^0x[[:xdigit:]]+$/i ) {
81        $dec = hex($hex);
82    }
83    elsif ( $arg =~ /^[[:xdigit:]]+$/i ) {
84        $dec = hex($hex);
85    }
86    else {
87        printf STDERR "? not a codepoint: $dec\n";
88        return;
89    }
90    my $chr  = chr($dec);
91    my $type = (
92        $chr =~ /\p{isPrint}/
93        ? (
94            $chr =~ /\p{isAlpha}/
95            ? "alpha"
96            : (
97                $chr =~ /\p{isPunct}/
98                ? "punct"
99                : (
100                    $chr =~ /\p{isDigit}/
101                    ? "digit"
102                    : "printing"
103                )
104            )
105          )
106        : (
107            $chr =~ /\p{isCntrl}/
108            ? "cntrl"
109            : "nonprinting"
110        )
111    );
112    printf "%d ->%#x ->{%s}%s(%s %s %s)\n", $dec, $dec, $chr,
113    &pad_column($chr),
114      &num_bytes($chr),
115      &num_cells($chr),
116      $type;
117}
118
119binmode( STDOUT, ":utf8" );
120while ( $#ARGV >= 0 ) {
121    vxt_utf8( shift @ARGV );
122}
123
1241;
125