1a5ae21e4Smrg#!/usr/bin/env perl
204b94745Smrg# $XTermId: utf8.pl,v 1.12 2022/07/08 18:32:43 tom Exp $
3a5ae21e4Smrg# -----------------------------------------------------------------------------
4a5ae21e4Smrg# this file is part of xterm
5a5ae21e4Smrg#
65307cd1aSmrg# Copyright 2012-2018,2022 by Thomas E. Dickey
75307cd1aSmrg#
8a5ae21e4Smrg#                         All Rights Reserved
95307cd1aSmrg#
10a5ae21e4Smrg# Permission is hereby granted, free of charge, to any person obtaining a
11a5ae21e4Smrg# copy of this software and associated documentation files (the
12a5ae21e4Smrg# "Software"), to deal in the Software without restriction, including
13a5ae21e4Smrg# without limitation the rights to use, copy, modify, merge, publish,
14a5ae21e4Smrg# distribute, sublicense, and/or sell copies of the Software, and to
15a5ae21e4Smrg# permit persons to whom the Software is furnished to do so, subject to
16a5ae21e4Smrg# the following conditions:
175307cd1aSmrg#
18a5ae21e4Smrg# The above copyright notice and this permission notice shall be included
19a5ae21e4Smrg# in all copies or substantial portions of the Software.
205307cd1aSmrg#
21a5ae21e4Smrg# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22a5ae21e4Smrg# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23a5ae21e4Smrg# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24a5ae21e4Smrg# IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
25a5ae21e4Smrg# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
26a5ae21e4Smrg# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
27a5ae21e4Smrg# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
285307cd1aSmrg#
29a5ae21e4Smrg# Except as contained in this notice, the name(s) of the above copyright
30a5ae21e4Smrg# holders shall not be used in advertising or otherwise to promote the
31a5ae21e4Smrg# sale, use or other dealings in this Software without prior written
32a5ae21e4Smrg# authorization.
33a5ae21e4Smrg# -----------------------------------------------------------------------------
34a5ae21e4Smrg# display the given Unicode characters, given their hex or decimal values.
35a5ae21e4Smrg
365307cd1aSmrguse warnings FATAL => "overflow";
375307cd1aSmrgno warnings "portable";
38a5ae21e4Smrguse strict;
39a5ae21e4Smrguse Encode 'encode_utf8';
405307cd1aSmrguse Text::CharWidth qw(mbswidth);
415307cd1aSmrg
425307cd1aSmrg$| = 1;
435307cd1aSmrg
445307cd1aSmrgsub num_bytes($) {
455307cd1aSmrg    my $char  = shift;
465307cd1aSmrg    my $value = length( Encode::encode_utf8($char) );
475307cd1aSmrg    my $result =
485307cd1aSmrg      ( $value <= 0
495307cd1aSmrg        ? "no bytes"
505307cd1aSmrg        : ( $value > 1 ? sprintf( "%d bytes", $value ) : "1 bytes" ) );
515307cd1aSmrg    return $result;
525307cd1aSmrg}
535307cd1aSmrg
545307cd1aSmrgsub num_cells($) {
555307cd1aSmrg    my $char  = shift;
565307cd1aSmrg    my $value = mbswidth($char);
575307cd1aSmrg    my $result =
585307cd1aSmrg      ( $value <= 0
595307cd1aSmrg        ? "no cells"
605307cd1aSmrg        : ( $value > 1 ? sprintf( "%d cells", $value ) : "1 cell" ) );
615307cd1aSmrg    return $result;
625307cd1aSmrg}
635307cd1aSmrg
645307cd1aSmrgsub pad_column($) {
655307cd1aSmrg    my $char  = shift;
665307cd1aSmrg    my $value = mbswidth($char);
675307cd1aSmrg    $value = 0 if ( $value < 0);
685307cd1aSmrg    my $result = sprintf( "%.*s", 3 - $value, "    ");
695307cd1aSmrg    return $result;
705307cd1aSmrg}
71a5ae21e4Smrg
72a5ae21e4Smrgsub vxt_utf8($) {
735307cd1aSmrg    my $arg = $_[0];
745307cd1aSmrg    my $hex = $arg;
755307cd1aSmrg    my $dec = $arg;
765307cd1aSmrg    if ( $arg =~ /^u\+[[:xdigit:]]+$/i ) {
775307cd1aSmrg        $hex =~ s/^../0x/;
785307cd1aSmrg        $dec = hex($hex);
795307cd1aSmrg    }
805307cd1aSmrg    elsif ( $arg =~ /^0x[[:xdigit:]]+$/i ) {
815307cd1aSmrg        $dec = hex($hex);
825307cd1aSmrg    }
835307cd1aSmrg    elsif ( $arg =~ /^[[:xdigit:]]+$/i ) {
845307cd1aSmrg        $dec = hex($hex);
855307cd1aSmrg    }
865307cd1aSmrg    else {
875307cd1aSmrg        printf STDERR "? not a codepoint: $dec\n";
885307cd1aSmrg        return;
895307cd1aSmrg    }
905307cd1aSmrg    my $chr  = chr($dec);
915307cd1aSmrg    my $type = (
925307cd1aSmrg        $chr =~ /\p{isPrint}/
935307cd1aSmrg        ? (
945307cd1aSmrg            $chr =~ /\p{isAlpha}/
955307cd1aSmrg            ? "alpha"
965307cd1aSmrg            : (
975307cd1aSmrg                $chr =~ /\p{isPunct}/
985307cd1aSmrg                ? "punct"
995307cd1aSmrg                : (
1005307cd1aSmrg                    $chr =~ /\p{isDigit}/
1015307cd1aSmrg                    ? "digit"
1025307cd1aSmrg                    : "printing"
1035307cd1aSmrg                )
1045307cd1aSmrg            )
1055307cd1aSmrg          )
1065307cd1aSmrg        : (
1075307cd1aSmrg            $chr =~ /\p{isCntrl}/
1085307cd1aSmrg            ? "cntrl"
1095307cd1aSmrg            : "nonprinting"
1105307cd1aSmrg        )
1115307cd1aSmrg    );
1125307cd1aSmrg    printf "%d ->%#x ->{%s}%s(%s %s %s)\n", $dec, $dec, $chr,
1135307cd1aSmrg    &pad_column($chr),
1145307cd1aSmrg      &num_bytes($chr),
1155307cd1aSmrg      &num_cells($chr),
1165307cd1aSmrg      $type;
117a5ae21e4Smrg}
118a5ae21e4Smrg
1195307cd1aSmrgbinmode( STDOUT, ":utf8" );
120a5ae21e4Smrgwhile ( $#ARGV >= 0 ) {
1215307cd1aSmrg    vxt_utf8( shift @ARGV );
122a5ae21e4Smrg}
1235307cd1aSmrg
1245307cd1aSmrg1;
125