Ruby 2.7.7p221 (2022-11-24 revision 168ec2b1e5ad0e4688e963d9de019557c78feed9)
encoding.c
Go to the documentation of this file.
1/**********************************************************************
2
3 encoding.c -
4
5 $Author$
6 created at: Thu May 24 17:23:27 JST 2007
7
8 Copyright (C) 2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "ruby/encoding.h"
13#include "internal.h"
14#include "encindex.h"
15#include "regenc.h"
16#include <ctype.h>
17#include "ruby/util.h"
18
19#include "ruby_assert.h"
20#ifndef ENC_DEBUG
21#define ENC_DEBUG 0
22#endif
23#define ENC_ASSERT(expr) RUBY_ASSERT_WHEN(ENC_DEBUG, expr)
24#define MUST_STRING(str) (ENC_ASSERT(RB_TYPE_P(str, T_STRING)), str)
25
26#undef rb_ascii8bit_encindex
27#undef rb_utf8_encindex
28#undef rb_usascii_encindex
29
31
32#if defined __GNUC__ && __GNUC__ >= 4
33#pragma GCC visibility push(default)
34int rb_enc_register(const char *name, rb_encoding *encoding);
35void rb_enc_set_base(const char *name, const char *orig);
36int rb_enc_set_dummy(int index);
37void rb_encdb_declare(const char *name);
38int rb_encdb_replicate(const char *name, const char *orig);
39int rb_encdb_dummy(const char *name);
40int rb_encdb_alias(const char *alias, const char *orig);
42#pragma GCC visibility pop
43#endif
44
45static ID id_encoding;
47static VALUE rb_encoding_list;
48
50 const char *name;
53};
54
55static struct {
57 int count;
58 int size;
60} enc_table;
61
62#define ENC_DUMMY_FLAG (1<<24)
63#define ENC_INDEX_MASK (~(~0U<<24))
64
65#define ENC_TO_ENCINDEX(enc) (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK)
66#define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG)
67#define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG)
68
69#define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
70#define UNSPECIFIED_ENCODING INT_MAX
71
72#define ENCODING_NAMELEN_MAX 63
73#define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
74
75#define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
76
77static int load_encoding(const char *name);
78
79static const rb_data_type_t encoding_data_type = {
80 "encoding",
81 {0, 0, 0,},
83};
84
85#define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
86#define is_obj_encoding(obj) (RB_TYPE_P((obj), T_DATA) && is_data_encoding(obj))
87
88int
90{
91 return is_data_encoding(obj);
92}
93
94static VALUE
95enc_new(rb_encoding *encoding)
96{
97 return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, (void *)encoding);
98}
99
100static VALUE
101rb_enc_from_encoding_index(int idx)
102{
103 VALUE list, enc;
104
105 if (!(list = rb_encoding_list)) {
106 rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
107 }
108 enc = rb_ary_entry(list, idx);
109 if (NIL_P(enc)) {
110 rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
111 }
112 return enc;
113}
114
115VALUE
117{
118 int idx;
119 if (!encoding) return Qnil;
120 idx = ENC_TO_ENCINDEX(encoding);
121 return rb_enc_from_encoding_index(idx);
122}
123
124int
126{
127 return enc ? ENC_TO_ENCINDEX(enc) : 0;
128}
129
130int
132{
133 return ENC_DUMMY_P(enc) != 0;
134}
135
136static int enc_autoload(rb_encoding *);
137
138static int
139check_encoding(rb_encoding *enc)
140{
143 return -1;
144 if (enc_autoload_p(enc)) {
145 index = enc_autoload(enc);
146 }
147 return index;
148}
149
150static int
151enc_check_encoding(VALUE obj)
152{
153 if (!is_obj_encoding(obj)) {
154 return -1;
155 }
156 return check_encoding(RDATA(obj)->data);
157}
158
159NORETURN(static void not_encoding(VALUE enc));
160static void
161not_encoding(VALUE enc)
162{
163 rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Encoding)",
165}
166
167static rb_encoding *
168must_encoding(VALUE enc)
169{
170 int index = enc_check_encoding(enc);
171 if (index < 0) {
172 not_encoding(enc);
173 }
174 return DATA_PTR(enc);
175}
176
177static rb_encoding *
178must_encindex(int index)
179{
181 if (!enc) {
182 rb_raise(rb_eEncodingError, "encoding index out of bound: %d",
183 index);
184 }
185 if (ENC_TO_ENCINDEX(enc) != (int)(index & ENC_INDEX_MASK)) {
186 rb_raise(rb_eEncodingError, "wrong encoding index %d for %s (expected %d)",
188 }
189 if (enc_autoload_p(enc) && enc_autoload(enc) == -1) {
190 rb_loaderror("failed to load encoding (%s)",
192 }
193 return enc;
194}
195
196int
198{
199 int idx;
200
201 idx = enc_check_encoding(enc);
202 if (idx >= 0) {
203 return idx;
204 }
205 else if (NIL_P(enc = rb_check_string_type(enc))) {
206 return -1;
207 }
209 return -1;
210 }
212}
213
214/* Returns encoding index or UNSPECIFIED_ENCODING */
215static int
216str_find_encindex(VALUE enc)
217{
218 int idx;
219
222 rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
223 }
225 return idx;
226}
227
228static int
229str_to_encindex(VALUE enc)
230{
231 int idx = str_find_encindex(enc);
232 if (idx < 0) {
233 rb_raise(rb_eArgError, "unknown encoding name - %"PRIsVALUE, enc);
234 }
235 return idx;
236}
237
238static rb_encoding *
239str_to_encoding(VALUE enc)
240{
241 return rb_enc_from_index(str_to_encindex(enc));
242}
243
246{
247 if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
248 return str_to_encoding(enc);
249}
250
253{
254 int idx;
255 if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
256 idx = str_find_encindex(enc);
257 if (idx < 0) return NULL;
258 return rb_enc_from_index(idx);
259}
260
261static int
262enc_table_expand(int newsize)
263{
264 struct rb_encoding_entry *ent;
265 int count = newsize;
266
267 if (enc_table.size >= newsize) return newsize;
268 newsize = (newsize + 7) / 8 * 8;
269 ent = REALLOC_N(enc_table.list, struct rb_encoding_entry, newsize);
270 memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
271 enc_table.list = ent;
272 enc_table.size = newsize;
273 return count;
274}
275
276static int
277enc_register_at(int index, const char *name, rb_encoding *base_encoding)
278{
279 struct rb_encoding_entry *ent = &enc_table.list[index];
280 rb_raw_encoding *encoding;
281 VALUE list;
282
283 if (!valid_encoding_name_p(name)) return -1;
284 if (!ent->name) {
285 ent->name = name = strdup(name);
286 }
287 else if (STRCASECMP(name, ent->name)) {
288 return -1;
289 }
290 encoding = (rb_raw_encoding *)ent->enc;
291 if (!encoding) {
292 encoding = xmalloc(sizeof(rb_encoding));
293 }
294 if (base_encoding) {
295 *encoding = *base_encoding;
296 }
297 else {
298 memset(encoding, 0, sizeof(*ent->enc));
299 }
300 encoding->name = name;
301 encoding->ruby_encoding_index = index;
302 ent->enc = encoding;
303 st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
304 list = rb_encoding_list;
305 if (list && NIL_P(rb_ary_entry(list, index))) {
306 /* initialize encoding data */
307 rb_ary_store(list, index, enc_new(encoding));
308 }
309 return index;
310}
311
312static int
313enc_register(const char *name, rb_encoding *encoding)
314{
315 int index = enc_table.count;
316
317 if ((index = enc_table_expand(index + 1)) < 0) return -1;
318 enc_table.count = index;
319 return enc_register_at(index - 1, name, encoding);
320}
321
322static void set_encoding_const(const char *, rb_encoding *);
323int rb_enc_registered(const char *name);
324
325int
326rb_enc_register(const char *name, rb_encoding *encoding)
327{
329
330 if (index >= 0) {
332 if (STRCASECMP(name, rb_enc_name(oldenc))) {
333 index = enc_register(name, encoding);
334 }
335 else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
336 enc_register_at(index, name, encoding);
337 }
338 else {
339 rb_raise(rb_eArgError, "encoding %s is already registered", name);
340 }
341 }
342 else {
343 index = enc_register(name, encoding);
344 set_encoding_const(name, rb_enc_from_index(index));
345 }
346 return index;
347}
348
349void
351{
352 int idx = rb_enc_registered(name);
353 if (idx < 0) {
354 idx = enc_register(name, 0);
355 }
356 set_encoding_const(name, rb_enc_from_index(idx));
357}
358
359static void
360enc_check_duplication(const char *name)
361{
362 if (rb_enc_registered(name) >= 0) {
363 rb_raise(rb_eArgError, "encoding %s is already registered", name);
364 }
365}
366
367static rb_encoding*
368set_base_encoding(int index, rb_encoding *base)
369{
370 rb_encoding *enc = enc_table.list[index].enc;
371
372 enc_table.list[index].base = base;
374 return enc;
375}
376
377/* for encdb.h
378 * Set base encoding for encodings which are not replicas
379 * but not in their own files.
380 */
381void
382rb_enc_set_base(const char *name, const char *orig)
383{
384 int idx = rb_enc_registered(name);
385 int origidx = rb_enc_registered(orig);
386 set_base_encoding(idx, rb_enc_from_index(origidx));
387}
388
389/* for encdb.h
390 * Set encoding dummy.
391 */
392int
394{
395 rb_encoding *enc = enc_table.list[index].enc;
396
398 return index;
399}
400
401int
402rb_enc_replicate(const char *name, rb_encoding *encoding)
403{
404 int idx;
405
406 enc_check_duplication(name);
407 idx = enc_register(name, encoding);
408 set_base_encoding(idx, encoding);
409 set_encoding_const(name, rb_enc_from_index(idx));
410 return idx;
411}
412
413/*
414 * call-seq:
415 * enc.replicate(name) -> encoding
416 *
417 * Returns a replicated encoding of _enc_ whose name is _name_.
418 * The new encoding should have the same byte structure of _enc_.
419 * If _name_ is used by another encoding, raise ArgumentError.
420 *
421 */
422static VALUE
423enc_replicate(VALUE encoding, VALUE name)
424{
425 return rb_enc_from_encoding_index(
427 rb_to_encoding(encoding)));
428}
429
430static int
431enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
432{
433 if (idx < 0) {
434 idx = enc_register(name, origenc);
435 }
436 else {
437 idx = enc_register_at(idx, name, origenc);
438 }
439 if (idx >= 0) {
440 set_base_encoding(idx, origenc);
441 set_encoding_const(name, rb_enc_from_index(idx));
442 }
443 else {
444 rb_raise(rb_eArgError, "failed to replicate encoding");
445 }
446 return idx;
447}
448
449int
450rb_encdb_replicate(const char *name, const char *orig)
451{
452 int origidx = rb_enc_registered(orig);
453 int idx = rb_enc_registered(name);
454
455 if (origidx < 0) {
456 origidx = enc_register(orig, 0);
457 }
458 return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
459}
460
461int
463{
465 rb_encoding *enc = enc_table.list[index].enc;
466
468 return index;
469}
470
471int
473{
474 int index = enc_replicate_with_index(name, rb_ascii8bit_encoding(),
476 rb_encoding *enc = enc_table.list[index].enc;
477
479 return index;
480}
481
482/*
483 * call-seq:
484 * enc.dummy? -> true or false
485 *
486 * Returns true for dummy encodings.
487 * A dummy encoding is an encoding for which character handling is not properly
488 * implemented.
489 * It is used for stateful encodings.
490 *
491 * Encoding::ISO_2022_JP.dummy? #=> true
492 * Encoding::UTF_8.dummy? #=> false
493 *
494 */
495static VALUE
496enc_dummy_p(VALUE enc)
497{
498 return ENC_DUMMY_P(must_encoding(enc)) ? Qtrue : Qfalse;
499}
500
501/*
502 * call-seq:
503 * enc.ascii_compatible? -> true or false
504 *
505 * Returns whether ASCII-compatible or not.
506 *
507 * Encoding::UTF_8.ascii_compatible? #=> true
508 * Encoding::UTF_16BE.ascii_compatible? #=> false
509 *
510 */
511static VALUE
512enc_ascii_compatible_p(VALUE enc)
513{
514 return rb_enc_asciicompat(must_encoding(enc)) ? Qtrue : Qfalse;
515}
516
517/*
518 * Returns non-zero when the encoding is Unicode series other than UTF-7 else 0.
519 */
520int
522{
523 return ONIGENC_IS_UNICODE(enc);
524}
525
526static st_data_t
527enc_dup_name(st_data_t name)
528{
529 return (st_data_t)strdup((const char *)name);
530}
531
532/*
533 * Returns copied alias name when the key is added for st_table,
534 * else returns NULL.
535 */
536static int
537enc_alias_internal(const char *alias, int idx)
538{
539 return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
540 enc_dup_name);
541}
542
543static int
544enc_alias(const char *alias, int idx)
545{
546 if (!valid_encoding_name_p(alias)) return -1;
547 if (!enc_alias_internal(alias, idx))
548 set_encoding_const(alias, rb_enc_from_index(idx));
549 return idx;
550}
551
552int
553rb_enc_alias(const char *alias, const char *orig)
554{
555 int idx;
556
557 enc_check_duplication(alias);
558 if ((idx = rb_enc_find_index(orig)) < 0) {
559 return -1;
560 }
561 return enc_alias(alias, idx);
562}
563
564int
565rb_encdb_alias(const char *alias, const char *orig)
566{
567 int idx = rb_enc_registered(orig);
568
569 if (idx < 0) {
570 idx = enc_register(orig, 0);
571 }
572 return enc_alias(alias, idx);
573}
574
575void
577{
579}
580
581void
583{
584 enc_table_expand(ENCODING_COUNT + 1);
585 if (!enc_table.names) {
586 enc_table.names = st_init_strcasetable();
587 }
588#define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
591 ENC_REGISTER(US_ASCII);
592#undef ENC_REGISTER
593#define ENCDB_REGISTER(name, enc) enc_register_at(ENCINDEX_##enc, name, NULL)
594 ENCDB_REGISTER("UTF-16BE", UTF_16BE);
595 ENCDB_REGISTER("UTF-16LE", UTF_16LE);
596 ENCDB_REGISTER("UTF-32BE", UTF_32BE);
597 ENCDB_REGISTER("UTF-32LE", UTF_32LE);
598 ENCDB_REGISTER("UTF-16", UTF_16);
599 ENCDB_REGISTER("UTF-32", UTF_32);
600 ENCDB_REGISTER("UTF8-MAC", UTF8_MAC);
601
602 ENCDB_REGISTER("EUC-JP", EUC_JP);
603 ENCDB_REGISTER("Windows-31J", Windows_31J);
604#undef ENCDB_REGISTER
605 enc_table.count = ENCINDEX_BUILTIN_MAX;
606}
607
610{
611 if (UNLIKELY(index < 0 || enc_table.count <= (index &= ENC_INDEX_MASK))) {
612 return 0;
613 }
614 return enc_table.list[index].enc;
615}
616
619{
620 return must_encindex(index);
621}
622
623int
625{
626 st_data_t idx = 0;
627
628 if (!name) return -1;
629 if (!enc_table.list) return -1;
630 if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
631 return (int)idx;
632 }
633 return -1;
634}
635
636static int
637load_encoding(const char *name)
638{
639 VALUE enclib = rb_sprintf("enc/%s.so", name);
642 VALUE errinfo;
643 char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
644 int loaded;
645 int idx;
646
647 while (s < e) {
648 if (!ISALNUM(*s)) *s = '_';
649 else if (ISUPPER(*s)) *s = (char)TOLOWER(*s);
650 ++s;
651 }
652 enclib = rb_fstring(enclib);
655 errinfo = rb_errinfo();
656 loaded = rb_require_internal(enclib);
659 rb_set_errinfo(errinfo);
660 if (loaded < 0 || 1 < loaded) return -1;
661 if ((idx = rb_enc_registered(name)) < 0) return -1;
662 if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
663 return idx;
664}
665
666static int
667enc_autoload(rb_encoding *enc)
668{
669 int i;
670 rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
671
672 if (base) {
673 i = 0;
674 do {
675 if (i >= enc_table.count) return -1;
676 } while (enc_table.list[i].enc != base && (++i, 1));
677 if (enc_autoload_p(base)) {
678 if (enc_autoload(base) < 0) return -1;
679 }
681 enc_register_at(i & ENC_INDEX_MASK, rb_enc_name(enc), base);
682 ((rb_raw_encoding *)enc)->ruby_encoding_index = i;
683 i &= ENC_INDEX_MASK;
684 }
685 else {
686 i = load_encoding(rb_enc_name(enc));
687 }
688 return i;
689}
690
691/* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
692int
694{
695 int i = rb_enc_registered(name);
697
698 if (i < 0) {
699 i = load_encoding(name);
700 }
701 else if (!(enc = rb_enc_from_index(i))) {
702 if (i != UNSPECIFIED_ENCODING) {
703 rb_raise(rb_eArgError, "encoding %s is not registered", name);
704 }
705 }
706 else if (enc_autoload_p(enc)) {
707 if (enc_autoload(enc) < 0) {
708 rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
709 name);
710 return 0;
711 }
712 }
713 return i;
714}
715
716int
717rb_enc_find_index2(const char *name, long len)
718{
720
721 if (len > ENCODING_NAMELEN_MAX) return -1;
722 memcpy(buf, name, len);
723 buf[len] = '\0';
724 return rb_enc_find_index(buf);
725}
726
728rb_enc_find(const char *name)
729{
730 int idx = rb_enc_find_index(name);
731 if (idx < 0) idx = 0;
732 return rb_enc_from_index(idx);
733}
734
735static inline int
736enc_capable(VALUE obj)
737{
738 if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
739 switch (BUILTIN_TYPE(obj)) {
740 case T_STRING:
741 case T_REGEXP:
742 case T_FILE:
743 case T_SYMBOL:
744 return TRUE;
745 case T_DATA:
746 if (is_data_encoding(obj)) return TRUE;
747 default:
748 return FALSE;
749 }
750}
751
752int
754{
755 return enc_capable(obj);
756}
757
758ID
760{
761 CONST_ID(id_encoding, "encoding");
762 return id_encoding;
763}
764
765static int
766enc_get_index_str(VALUE str)
767{
769 if (i == ENCODING_INLINE_MAX) {
770 VALUE iv;
771
773 i = NUM2INT(iv);
774 }
775 return i;
776}
777
778int
780{
781 int i = -1;
782 VALUE tmp;
783
784 if (SPECIAL_CONST_P(obj)) {
785 if (!SYMBOL_P(obj)) return -1;
786 obj = rb_sym2str(obj);
787 }
788 switch (BUILTIN_TYPE(obj)) {
789 case T_STRING:
790 case T_SYMBOL:
791 case T_REGEXP:
792 i = enc_get_index_str(obj);
793 break;
794 case T_FILE:
795 tmp = rb_funcallv(obj, rb_intern("internal_encoding"), 0, 0);
796 if (NIL_P(tmp)) {
797 tmp = rb_funcallv(obj, rb_intern("external_encoding"), 0, 0);
798 }
799 if (is_obj_encoding(tmp)) {
800 i = enc_check_encoding(tmp);
801 }
802 break;
803 case T_DATA:
804 if (is_data_encoding(obj)) {
805 i = enc_check_encoding(obj);
806 }
807 break;
808 default:
809 break;
810 }
811 return i;
812}
813
814static void
815enc_set_index(VALUE obj, int idx)
816{
817 if (!enc_capable(obj)) {
818 rb_raise(rb_eArgError, "cannot set encoding on non-encoding capable object");
819 }
820
821 if (idx < ENCODING_INLINE_MAX) {
823 return;
824 }
827}
828
829void
831{
833 must_encindex(idx);
834 enc_set_index(obj, idx);
835}
836
837VALUE
839{
841 int oldidx, oldtermlen, termlen;
842
843/* enc_check_capable(obj);*/
845 oldidx = rb_enc_get_index(obj);
846 if (oldidx == idx)
847 return obj;
848 if (SPECIAL_CONST_P(obj)) {
849 rb_raise(rb_eArgError, "cannot set encoding");
850 }
851 enc = must_encindex(idx);
855 }
856 termlen = rb_enc_mbminlen(enc);
857 oldtermlen = rb_enc_mbminlen(rb_enc_from_index(oldidx));
858 if (oldtermlen != termlen && RB_TYPE_P(obj, T_STRING)) {
859 rb_str_change_terminator_length(obj, oldtermlen, termlen);
860 }
861 enc_set_index(obj, idx);
862 return obj;
863}
864
865VALUE
867{
869}
870
873{
875}
876
877static rb_encoding* enc_compatible_str(VALUE str1, VALUE str2);
878
881{
882 rb_encoding *enc = enc_compatible_str(MUST_STRING(str1), MUST_STRING(str2));
883 if (!enc)
884 rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
885 rb_enc_name(rb_enc_get(str1)),
886 rb_enc_name(rb_enc_get(str2)));
887 return enc;
888}
889
892{
893 rb_encoding *enc = rb_enc_compatible(str1, str2);
894 if (!enc)
895 rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
896 rb_enc_name(rb_enc_get(str1)),
897 rb_enc_name(rb_enc_get(str2)));
898 return enc;
899}
900
901static rb_encoding*
902enc_compatible_latter(VALUE str1, VALUE str2, int idx1, int idx2)
903{
904 int isstr1, isstr2;
905 rb_encoding *enc1 = rb_enc_from_index(idx1);
906 rb_encoding *enc2 = rb_enc_from_index(idx2);
907
908 isstr2 = RB_TYPE_P(str2, T_STRING);
909 if (isstr2 && RSTRING_LEN(str2) == 0)
910 return enc1;
911 isstr1 = RB_TYPE_P(str1, T_STRING);
912 if (isstr1 && RSTRING_LEN(str1) == 0)
913 return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
914 if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
915 return 0;
916 }
917
918 /* objects whose encoding is the same of contents */
919 if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
920 return enc1;
921 if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
922 return enc2;
923
924 if (!isstr1) {
925 VALUE tmp = str1;
926 int idx0 = idx1;
927 str1 = str2;
928 str2 = tmp;
929 idx1 = idx2;
930 idx2 = idx0;
931 idx0 = isstr1;
932 isstr1 = isstr2;
933 isstr2 = idx0;
934 }
935 if (isstr1) {
936 int cr1, cr2;
937
938 cr1 = rb_enc_str_coderange(str1);
939 if (isstr2) {
940 cr2 = rb_enc_str_coderange(str2);
941 if (cr1 != cr2) {
942 /* may need to handle ENC_CODERANGE_BROKEN */
943 if (cr1 == ENC_CODERANGE_7BIT) return enc2;
944 if (cr2 == ENC_CODERANGE_7BIT) return enc1;
945 }
946 if (cr2 == ENC_CODERANGE_7BIT) {
947 return enc1;
948 }
949 }
950 if (cr1 == ENC_CODERANGE_7BIT)
951 return enc2;
952 }
953 return 0;
954}
955
956static rb_encoding*
957enc_compatible_str(VALUE str1, VALUE str2)
958{
959 int idx1 = enc_get_index_str(str1);
960 int idx2 = enc_get_index_str(str2);
961
962 if (idx1 < 0 || idx2 < 0)
963 return 0;
964
965 if (idx1 == idx2) {
966 return rb_enc_from_index(idx1);
967 }
968 else {
969 return enc_compatible_latter(str1, str2, idx1, idx2);
970 }
971}
972
975{
976 int idx1 = rb_enc_get_index(str1);
977 int idx2 = rb_enc_get_index(str2);
978
979 if (idx1 < 0 || idx2 < 0)
980 return 0;
981
982 if (idx1 == idx2) {
983 return rb_enc_from_index(idx1);
984 }
985
986 return enc_compatible_latter(str1, str2, idx1, idx2);
987}
988
989void
991{
993}
994
995
996/*
997 * call-seq:
998 * obj.encoding -> encoding
999 *
1000 * Returns the Encoding object that represents the encoding of obj.
1001 */
1002
1003VALUE
1005{
1006 int idx = rb_enc_get_index(obj);
1007 if (idx < 0) {
1008 rb_raise(rb_eTypeError, "unknown encoding");
1009 }
1010 return rb_enc_from_encoding_index(idx & ENC_INDEX_MASK);
1011}
1012
1013int
1014rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
1015{
1016 return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
1017}
1018
1019int
1020rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
1021{
1022 int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
1024 return MBCLEN_CHARFOUND_LEN(n);
1025 else {
1026 int min = rb_enc_mbminlen(enc);
1027 return min <= e-p ? min : (int)(e-p);
1028 }
1029}
1030
1031int
1032rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
1033{
1034 int n;
1035 if (e <= p)
1038 if (e-p < n)
1039 return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
1040 return n;
1041}
1042
1043int
1044rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
1045{
1046 unsigned int c;
1047 int l;
1048 if (e <= p)
1049 return -1;
1050 if (rb_enc_asciicompat(enc)) {
1051 c = (unsigned char)*p;
1052 if (!ISASCII(c))
1053 return -1;
1054 if (len) *len = 1;
1055 return c;
1056 }
1057 l = rb_enc_precise_mbclen(p, e, enc);
1058 if (!MBCLEN_CHARFOUND_P(l))
1059 return -1;
1060 c = rb_enc_mbc_to_codepoint(p, e, enc);
1061 if (!rb_enc_isascii(c, enc))
1062 return -1;
1063 if (len) *len = l;
1064 return c;
1065}
1066
1067unsigned int
1068rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
1069{
1070 int r;
1071 if (e <= p)
1072 rb_raise(rb_eArgError, "empty string");
1073 r = rb_enc_precise_mbclen(p, e, enc);
1074 if (!MBCLEN_CHARFOUND_P(r)) {
1075 rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
1076 }
1077 if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
1078 return rb_enc_mbc_to_codepoint(p, e, enc);
1079}
1080
1081#undef rb_enc_codepoint
1082unsigned int
1083rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
1084{
1085 return rb_enc_codepoint_len(p, e, 0, enc);
1086}
1087
1088int
1090{
1091 int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
1092 if (n == 0) {
1093 rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
1094 }
1095 return n;
1096}
1097
1098#undef rb_enc_code_to_mbclen
1099int
1101{
1102 return ONIGENC_CODE_TO_MBCLEN(enc, code);
1103}
1104
1105int
1107{
1109}
1110
1111int
1113{
1115}
1116
1117/*
1118 * call-seq:
1119 * enc.inspect -> string
1120 *
1121 * Returns a string which represents the encoding for programmers.
1122 *
1123 * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
1124 * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
1125 */
1126static VALUE
1127enc_inspect(VALUE self)
1128{
1130
1131 if (!is_data_encoding(self)) {
1132 not_encoding(self);
1133 }
1134 if (!(enc = DATA_PTR(self)) || rb_enc_from_index(rb_enc_to_index(enc)) != enc) {
1135 rb_raise(rb_eTypeError, "broken Encoding");
1136 }
1138 "#<%"PRIsVALUE":%s%s%s>", rb_obj_class(self),
1140 (ENC_DUMMY_P(enc) ? " (dummy)" : ""),
1141 enc_autoload_p(enc) ? " (autoload)" : "");
1142}
1143
1144/*
1145 * call-seq:
1146 * enc.name -> string
1147 * enc.to_s -> string
1148 *
1149 * Returns the name of the encoding.
1150 *
1151 * Encoding::UTF_8.name #=> "UTF-8"
1152 */
1153static VALUE
1154enc_name(VALUE self)
1155{
1157}
1158
1159static int
1160enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
1161{
1162 VALUE *arg = (VALUE *)args;
1163
1164 if ((int)idx == (int)arg[0]) {
1165 VALUE str = rb_fstring_cstr((char *)name);
1166 rb_ary_push(arg[1], str);
1167 }
1168 return ST_CONTINUE;
1169}
1170
1171/*
1172 * call-seq:
1173 * enc.names -> array
1174 *
1175 * Returns the list of name and aliases of the encoding.
1176 *
1177 * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J", "SJIS", "PCK"]
1178 */
1179static VALUE
1180enc_names(VALUE self)
1181{
1182 VALUE args[2];
1183
1184 args[0] = (VALUE)rb_to_encoding_index(self);
1185 args[1] = rb_ary_new2(0);
1186 st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
1187 return args[1];
1188}
1189
1190/*
1191 * call-seq:
1192 * Encoding.list -> [enc1, enc2, ...]
1193 *
1194 * Returns the list of loaded encodings.
1195 *
1196 * Encoding.list
1197 * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1198 * #<Encoding:ISO-2022-JP (dummy)>]
1199 *
1200 * Encoding.find("US-ASCII")
1201 * #=> #<Encoding:US-ASCII>
1202 *
1203 * Encoding.list
1204 * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1205 * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1206 *
1207 */
1208static VALUE
1209enc_list(VALUE klass)
1210{
1211 VALUE ary = rb_ary_new2(0);
1212 rb_ary_replace(ary, rb_encoding_list);
1213 return ary;
1214}
1215
1216/*
1217 * call-seq:
1218 * Encoding.find(string) -> enc
1219 *
1220 * Search the encoding with specified <i>name</i>.
1221 * <i>name</i> should be a string.
1222 *
1223 * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1224 *
1225 * Names which this method accept are encoding names and aliases
1226 * including following special aliases
1227 *
1228 * "external":: default external encoding
1229 * "internal":: default internal encoding
1230 * "locale":: locale encoding
1231 * "filesystem":: filesystem encoding
1232 *
1233 * An ArgumentError is raised when no encoding with <i>name</i>.
1234 * Only <code>Encoding.find("internal")</code> however returns nil
1235 * when no encoding named "internal", in other words, when Ruby has no
1236 * default internal encoding.
1237 */
1238static VALUE
1239enc_find(VALUE klass, VALUE enc)
1240{
1241 int idx;
1242 if (is_obj_encoding(enc))
1243 return enc;
1244 idx = str_to_encindex(enc);
1245 if (idx == UNSPECIFIED_ENCODING) return Qnil;
1246 return rb_enc_from_encoding_index(idx);
1247}
1248
1249/*
1250 * call-seq:
1251 * Encoding.compatible?(obj1, obj2) -> enc or nil
1252 *
1253 * Checks the compatibility of two objects.
1254 *
1255 * If the objects are both strings they are compatible when they are
1256 * concatenatable. The encoding of the concatenated string will be returned
1257 * if they are compatible, nil if they are not.
1258 *
1259 * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1260 * #=> #<Encoding:ISO-8859-1>
1261 *
1262 * Encoding.compatible?(
1263 * "\xa1".force_encoding("iso-8859-1"),
1264 * "\xa1\xa1".force_encoding("euc-jp"))
1265 * #=> nil
1266 *
1267 * If the objects are non-strings their encodings are compatible when they
1268 * have an encoding and:
1269 * * Either encoding is US-ASCII compatible
1270 * * One of the encodings is a 7-bit encoding
1271 *
1272 */
1273static VALUE
1274enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
1275{
1277
1278 if (!enc_capable(str1)) return Qnil;
1279 if (!enc_capable(str2)) return Qnil;
1280 enc = rb_enc_compatible(str1, str2);
1281 if (!enc) return Qnil;
1282 return rb_enc_from_encoding(enc);
1283}
1284
1285/* :nodoc: */
1286static VALUE
1287enc_s_alloc(VALUE klass)
1288{
1289 rb_undefined_alloc(klass);
1290 return Qnil;
1291}
1292
1293/* :nodoc: */
1294static VALUE
1295enc_dump(int argc, VALUE *argv, VALUE self)
1296{
1297 rb_check_arity(argc, 0, 1);
1298 return enc_name(self);
1299}
1300
1301/* :nodoc: */
1302static VALUE
1303enc_load(VALUE klass, VALUE str)
1304{
1305 return str;
1306}
1307
1308/* :nodoc: */
1309static VALUE
1310enc_m_loader(VALUE klass, VALUE str)
1311{
1312 return enc_find(klass, str);
1313}
1314
1317{
1318 return enc_table.list[ENCINDEX_ASCII].enc;
1319}
1320
1321int
1323{
1324 return ENCINDEX_ASCII;
1325}
1326
1329{
1330 return enc_table.list[ENCINDEX_UTF_8].enc;
1331}
1332
1333int
1335{
1336 return ENCINDEX_UTF_8;
1337}
1338
1341{
1342 return enc_table.list[ENCINDEX_US_ASCII].enc;
1343}
1344
1345int
1347{
1348 return ENCINDEX_US_ASCII;
1349}
1350
1351int rb_locale_charmap_index(void);
1352
1353int
1355{
1356 int idx = rb_locale_charmap_index();
1357
1358 if (idx < 0) idx = ENCINDEX_ASCII;
1359
1360 if (rb_enc_registered("locale") < 0) {
1361# if defined _WIN32
1362 void Init_w32_codepage(void);
1364# endif
1365 enc_alias_internal("locale", idx);
1366 }
1367
1368 return idx;
1369}
1370
1373{
1375}
1376
1377int
1379{
1380 int idx = rb_enc_registered("filesystem");
1381 if (idx < 0)
1382 idx = ENCINDEX_ASCII;
1383 return idx;
1384}
1385
1388{
1390}
1391
1393 int index; /* -2 => not yet set, -1 => nil */
1395};
1396
1397static struct default_encoding default_external = {0};
1398
1399static int
1400enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
1401{
1402 int overridden = FALSE;
1403
1404 if (def->index != -2)
1405 /* Already set */
1406 overridden = TRUE;
1407
1408 if (NIL_P(encoding)) {
1409 def->index = -1;
1410 def->enc = 0;
1411 st_insert(enc_table.names, (st_data_t)strdup(name),
1413 }
1414 else {
1415 def->index = rb_enc_to_index(rb_to_encoding(encoding));
1416 def->enc = 0;
1417 enc_alias_internal(name, def->index);
1418 }
1419
1420 if (def == &default_external)
1421 enc_alias_internal("filesystem", Init_enc_set_filesystem_encoding());
1422
1423 return overridden;
1424}
1425
1428{
1429 if (default_external.enc) return default_external.enc;
1430
1431 if (default_external.index >= 0) {
1432 default_external.enc = rb_enc_from_index(default_external.index);
1433 return default_external.enc;
1434 }
1435 else {
1436 return rb_locale_encoding();
1437 }
1438}
1439
1440VALUE
1442{
1444}
1445
1446/*
1447 * call-seq:
1448 * Encoding.default_external -> enc
1449 *
1450 * Returns default external encoding.
1451 *
1452 * The default external encoding is used by default for strings created from
1453 * the following locations:
1454 *
1455 * * CSV
1456 * * File data read from disk
1457 * * SDBM
1458 * * StringIO
1459 * * Zlib::GzipReader
1460 * * Zlib::GzipWriter
1461 * * String#inspect
1462 * * Regexp#inspect
1463 *
1464 * While strings created from these locations will have this encoding, the
1465 * encoding may not be valid. Be sure to check String#valid_encoding?.
1466 *
1467 * File data written to disk will be transcoded to the default external
1468 * encoding when written.
1469 *
1470 * The default external encoding is initialized by the locale or -E option.
1471 */
1472static VALUE
1473get_default_external(VALUE klass)
1474{
1475 return rb_enc_default_external();
1476}
1477
1478void
1480{
1481 if (NIL_P(encoding)) {
1482 rb_raise(rb_eArgError, "default external can not be nil");
1483 }
1484 enc_set_default_encoding(&default_external, encoding,
1485 "external");
1486}
1487
1488/*
1489 * call-seq:
1490 * Encoding.default_external = enc
1491 *
1492 * Sets default external encoding. You should not set
1493 * Encoding::default_external in ruby code as strings created before changing
1494 * the value may have a different encoding from strings created after the value
1495 * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1496 * the correct default_external.
1497 *
1498 * See Encoding::default_external for information on how the default external
1499 * encoding is used.
1500 */
1501static VALUE
1502set_default_external(VALUE klass, VALUE encoding)
1503{
1504 rb_warning("setting Encoding.default_external");
1506 return encoding;
1507}
1508
1509static struct default_encoding default_internal = {-2};
1510
1513{
1514 if (!default_internal.enc && default_internal.index >= 0) {
1515 default_internal.enc = rb_enc_from_index(default_internal.index);
1516 }
1517 return default_internal.enc; /* can be NULL */
1518}
1519
1520VALUE
1522{
1523 /* Note: These functions cope with default_internal not being set */
1525}
1526
1527/*
1528 * call-seq:
1529 * Encoding.default_internal -> enc
1530 *
1531 * Returns default internal encoding. Strings will be transcoded to the
1532 * default internal encoding in the following places if the default internal
1533 * encoding is not nil:
1534 *
1535 * * CSV
1536 * * Etc.sysconfdir and Etc.systmpdir
1537 * * File data read from disk
1538 * * File names from Dir
1539 * * Integer#chr
1540 * * String#inspect and Regexp#inspect
1541 * * Strings returned from Readline
1542 * * Strings returned from SDBM
1543 * * Time#zone
1544 * * Values from ENV
1545 * * Values in ARGV including $PROGRAM_NAME
1546 *
1547 * Additionally String#encode and String#encode! use the default internal
1548 * encoding if no encoding is given.
1549 *
1550 * The script encoding (__ENCODING__), not default_internal, is used as the
1551 * encoding of created strings.
1552 *
1553 * Encoding::default_internal is initialized by the source file's
1554 * internal_encoding or -E option.
1555 */
1556static VALUE
1557get_default_internal(VALUE klass)
1558{
1559 return rb_enc_default_internal();
1560}
1561
1562void
1564{
1565 enc_set_default_encoding(&default_internal, encoding,
1566 "internal");
1567}
1568
1569/*
1570 * call-seq:
1571 * Encoding.default_internal = enc or nil
1572 *
1573 * Sets default internal encoding or removes default internal encoding when
1574 * passed nil. You should not set Encoding::default_internal in ruby code as
1575 * strings created before changing the value may have a different encoding
1576 * from strings created after the change. Instead you should use
1577 * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1578 *
1579 * See Encoding::default_internal for information on how the default internal
1580 * encoding is used.
1581 */
1582static VALUE
1583set_default_internal(VALUE klass, VALUE encoding)
1584{
1585 rb_warning("setting Encoding.default_internal");
1587 return encoding;
1588}
1589
1590static void
1591set_encoding_const(const char *name, rb_encoding *enc)
1592{
1593 VALUE encoding = rb_enc_from_encoding(enc);
1594 char *s = (char *)name;
1595 int haslower = 0, hasupper = 0, valid = 0;
1596
1597 if (ISDIGIT(*s)) return;
1598 if (ISUPPER(*s)) {
1599 hasupper = 1;
1600 while (*++s && (ISALNUM(*s) || *s == '_')) {
1601 if (ISLOWER(*s)) haslower = 1;
1602 }
1603 }
1604 if (!*s) {
1605 if (s - name > ENCODING_NAMELEN_MAX) return;
1606 valid = 1;
1607 rb_define_const(rb_cEncoding, name, encoding);
1608 }
1609 if (!valid || haslower) {
1610 size_t len = s - name;
1611 if (len > ENCODING_NAMELEN_MAX) return;
1612 if (!haslower || !hasupper) {
1613 do {
1614 if (ISLOWER(*s)) haslower = 1;
1615 if (ISUPPER(*s)) hasupper = 1;
1616 } while (*++s && (!haslower || !hasupper));
1617 len = s - name;
1618 }
1619 len += strlen(s);
1620 if (len++ > ENCODING_NAMELEN_MAX) return;
1621 MEMCPY(s = ALLOCA_N(char, len), name, char, len);
1622 name = s;
1623 if (!valid) {
1624 if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1625 for (; *s; ++s) {
1626 if (!ISALNUM(*s)) *s = '_';
1627 }
1628 if (hasupper) {
1629 rb_define_const(rb_cEncoding, name, encoding);
1630 }
1631 }
1632 if (haslower) {
1633 for (s = (char *)name; *s; ++s) {
1634 if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1635 }
1636 rb_define_const(rb_cEncoding, name, encoding);
1637 }
1638 }
1639}
1640
1641static int
1642rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
1643{
1644 VALUE ary = (VALUE)arg;
1645 VALUE str = rb_fstring_cstr((char *)name);
1646 rb_ary_push(ary, str);
1647 return ST_CONTINUE;
1648}
1649
1650/*
1651 * call-seq:
1652 * Encoding.name_list -> ["enc1", "enc2", ...]
1653 *
1654 * Returns the list of available encoding names.
1655 *
1656 * Encoding.name_list
1657 * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1658 * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1659 * "Windows-31J",
1660 * "BINARY", "CP932", "eucJP"]
1661 *
1662 */
1663
1664static VALUE
1665rb_enc_name_list(VALUE klass)
1666{
1667 VALUE ary = rb_ary_new2(enc_table.names->num_entries);
1668 st_foreach(enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
1669 return ary;
1670}
1671
1672static int
1673rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
1674{
1675 VALUE *p = (VALUE *)arg;
1676 VALUE aliases = p[0], ary = p[1];
1677 int idx = (int)orig;
1678 VALUE key, str = rb_ary_entry(ary, idx);
1679
1680 if (NIL_P(str)) {
1682
1683 if (!enc) return ST_CONTINUE;
1684 if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
1685 return ST_CONTINUE;
1686 }
1688 rb_ary_store(ary, idx, str);
1689 }
1690 key = rb_fstring_cstr((char *)name);
1691 rb_hash_aset(aliases, key, str);
1692 return ST_CONTINUE;
1693}
1694
1695/*
1696 * call-seq:
1697 * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1698 *
1699 * Returns the hash of available encoding alias and original encoding name.
1700 *
1701 * Encoding.aliases
1702 * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1968"=>"US-ASCII",
1703 * "SJIS"=>"Windows-31J", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1704 *
1705 */
1706
1707static VALUE
1708rb_enc_aliases(VALUE klass)
1709{
1710 VALUE aliases[2];
1711 aliases[0] = rb_hash_new();
1712 aliases[1] = rb_ary_new();
1713 st_foreach(enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
1714 return aliases[0];
1715}
1716
1717/*
1718 * An Encoding instance represents a character encoding usable in Ruby. It is
1719 * defined as a constant under the Encoding namespace. It has a name and
1720 * optionally, aliases:
1721 *
1722 * Encoding::ISO_8859_1.name
1723 * #=> "ISO-8859-1"
1724 *
1725 * Encoding::ISO_8859_1.names
1726 * #=> ["ISO-8859-1", "ISO8859-1"]
1727 *
1728 * Ruby methods dealing with encodings return or accept Encoding instances as
1729 * arguments (when a method accepts an Encoding instance as an argument, it
1730 * can be passed an Encoding name or alias instead).
1731 *
1732 * "some string".encoding
1733 * #=> #<Encoding:UTF-8>
1734 *
1735 * string = "some string".encode(Encoding::ISO_8859_1)
1736 * #=> "some string"
1737 * string.encoding
1738 * #=> #<Encoding:ISO-8859-1>
1739 *
1740 * "some string".encode "ISO-8859-1"
1741 * #=> "some string"
1742 *
1743 * Encoding::ASCII_8BIT is a special encoding that is usually used for
1744 * a byte string, not a character string. But as the name insists, its
1745 * characters in the range of ASCII are considered as ASCII
1746 * characters. This is useful when you use ASCII-8BIT characters with
1747 * other ASCII compatible characters.
1748 *
1749 * == Changing an encoding
1750 *
1751 * The associated Encoding of a String can be changed in two different ways.
1752 *
1753 * First, it is possible to set the Encoding of a string to a new Encoding
1754 * without changing the internal byte representation of the string, with
1755 * String#force_encoding. This is how you can tell Ruby the correct encoding
1756 * of a string.
1757 *
1758 * string
1759 * #=> "R\xC3\xA9sum\xC3\xA9"
1760 * string.encoding
1761 * #=> #<Encoding:ISO-8859-1>
1762 * string.force_encoding(Encoding::UTF_8)
1763 * #=> "R\u00E9sum\u00E9"
1764 *
1765 * Second, it is possible to transcode a string, i.e. translate its internal
1766 * byte representation to another encoding. Its associated encoding is also
1767 * set to the other encoding. See String#encode for the various forms of
1768 * transcoding, and the Encoding::Converter class for additional control over
1769 * the transcoding process.
1770 *
1771 * string
1772 * #=> "R\u00E9sum\u00E9"
1773 * string.encoding
1774 * #=> #<Encoding:UTF-8>
1775 * string = string.encode!(Encoding::ISO_8859_1)
1776 * #=> "R\xE9sum\xE9"
1777 * string.encoding
1778 * #=> #<Encoding::ISO-8859-1>
1779 *
1780 * == Script encoding
1781 *
1782 * All Ruby script code has an associated Encoding which any String literal
1783 * created in the source code will be associated to.
1784 *
1785 * The default script encoding is Encoding::UTF_8 after v2.0, but it
1786 * can be changed by a magic comment on the first line of the source
1787 * code file (or second line, if there is a shebang line on the
1788 * first). The comment must contain the word <code>coding</code> or
1789 * <code>encoding</code>, followed by a colon, space and the Encoding
1790 * name or alias:
1791 *
1792 * # encoding: UTF-8
1793 *
1794 * "some string".encoding
1795 * #=> #<Encoding:UTF-8>
1796 *
1797 * The <code>__ENCODING__</code> keyword returns the script encoding of the file
1798 * which the keyword is written:
1799 *
1800 * # encoding: ISO-8859-1
1801 *
1802 * __ENCODING__
1803 * #=> #<Encoding:ISO-8859-1>
1804 *
1805 * <code>ruby -K</code> will change the default locale encoding, but this is
1806 * not recommended. Ruby source files should declare its script encoding by a
1807 * magic comment even when they only depend on US-ASCII strings or regular
1808 * expressions.
1809 *
1810 * == Locale encoding
1811 *
1812 * The default encoding of the environment. Usually derived from locale.
1813 *
1814 * see Encoding.locale_charmap, Encoding.find('locale')
1815 *
1816 * == Filesystem encoding
1817 *
1818 * The default encoding of strings from the filesystem of the environment.
1819 * This is used for strings of file names or paths.
1820 *
1821 * see Encoding.find('filesystem')
1822 *
1823 * == External encoding
1824 *
1825 * Each IO object has an external encoding which indicates the encoding that
1826 * Ruby will use to read its data. By default Ruby sets the external encoding
1827 * of an IO object to the default external encoding. The default external
1828 * encoding is set by locale encoding or the interpreter <code>-E</code> option.
1829 * Encoding.default_external returns the current value of the external
1830 * encoding.
1831 *
1832 * ENV["LANG"]
1833 * #=> "UTF-8"
1834 * Encoding.default_external
1835 * #=> #<Encoding:UTF-8>
1836 *
1837 * $ ruby -E ISO-8859-1 -e "p Encoding.default_external"
1838 * #<Encoding:ISO-8859-1>
1839 *
1840 * $ LANG=C ruby -e 'p Encoding.default_external'
1841 * #<Encoding:US-ASCII>
1842 *
1843 * The default external encoding may also be set through
1844 * Encoding.default_external=, but you should not do this as strings created
1845 * before and after the change will have inconsistent encodings. Instead use
1846 * <code>ruby -E</code> to invoke ruby with the correct external encoding.
1847 *
1848 * When you know that the actual encoding of the data of an IO object is not
1849 * the default external encoding, you can reset its external encoding with
1850 * IO#set_encoding or set it at IO object creation (see IO.new options).
1851 *
1852 * == Internal encoding
1853 *
1854 * To process the data of an IO object which has an encoding different
1855 * from its external encoding, you can set its internal encoding. Ruby will use
1856 * this internal encoding to transcode the data when it is read from the IO
1857 * object.
1858 *
1859 * Conversely, when data is written to the IO object it is transcoded from the
1860 * internal encoding to the external encoding of the IO object.
1861 *
1862 * The internal encoding of an IO object can be set with
1863 * IO#set_encoding or at IO object creation (see IO.new options).
1864 *
1865 * The internal encoding is optional and when not set, the Ruby default
1866 * internal encoding is used. If not explicitly set this default internal
1867 * encoding is +nil+ meaning that by default, no transcoding occurs.
1868 *
1869 * The default internal encoding can be set with the interpreter option
1870 * <code>-E</code>. Encoding.default_internal returns the current internal
1871 * encoding.
1872 *
1873 * $ ruby -e 'p Encoding.default_internal'
1874 * nil
1875 *
1876 * $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \
1877 * Encoding.default_internal]"
1878 * [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>]
1879 *
1880 * The default internal encoding may also be set through
1881 * Encoding.default_internal=, but you should not do this as strings created
1882 * before and after the change will have inconsistent encodings. Instead use
1883 * <code>ruby -E</code> to invoke ruby with the correct internal encoding.
1884 *
1885 * == IO encoding example
1886 *
1887 * In the following example a UTF-8 encoded string "R\u00E9sum\u00E9" is transcoded for
1888 * output to ISO-8859-1 encoding, then read back in and transcoded to UTF-8:
1889 *
1890 * string = "R\u00E9sum\u00E9"
1891 *
1892 * open("transcoded.txt", "w:ISO-8859-1") do |io|
1893 * io.write(string)
1894 * end
1895 *
1896 * puts "raw text:"
1897 * p File.binread("transcoded.txt")
1898 * puts
1899 *
1900 * open("transcoded.txt", "r:ISO-8859-1:UTF-8") do |io|
1901 * puts "transcoded text:"
1902 * p io.read
1903 * end
1904 *
1905 * While writing the file, the internal encoding is not specified as it is
1906 * only necessary for reading. While reading the file both the internal and
1907 * external encoding must be specified to obtain the correct result.
1908 *
1909 * $ ruby t.rb
1910 * raw text:
1911 * "R\xE9sum\xE9"
1912 *
1913 * transcoded text:
1914 * "R\u00E9sum\u00E9"
1915 *
1916 */
1917
1918void
1920{
1921#undef rb_intern
1922#define rb_intern(str) rb_intern_const(str)
1923 VALUE list;
1924 int i;
1925
1927 rb_define_alloc_func(rb_cEncoding, enc_s_alloc);
1929 rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
1930 rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
1931 rb_define_method(rb_cEncoding, "name", enc_name, 0);
1932 rb_define_method(rb_cEncoding, "names", enc_names, 0);
1933 rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
1934 rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
1935 rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
1936 rb_define_singleton_method(rb_cEncoding, "list", enc_list, 0);
1937 rb_define_singleton_method(rb_cEncoding, "name_list", rb_enc_name_list, 0);
1938 rb_define_singleton_method(rb_cEncoding, "aliases", rb_enc_aliases, 0);
1939 rb_define_singleton_method(rb_cEncoding, "find", enc_find, 1);
1940 rb_define_singleton_method(rb_cEncoding, "compatible?", enc_compatible_p, 2);
1941
1942 rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
1943 rb_define_singleton_method(rb_cEncoding, "_load", enc_load, 1);
1944
1945 rb_define_singleton_method(rb_cEncoding, "default_external", get_default_external, 0);
1946 rb_define_singleton_method(rb_cEncoding, "default_external=", set_default_external, 1);
1947 rb_define_singleton_method(rb_cEncoding, "default_internal", get_default_internal, 0);
1948 rb_define_singleton_method(rb_cEncoding, "default_internal=", set_default_internal, 1);
1949 rb_define_singleton_method(rb_cEncoding, "locale_charmap", rb_locale_charmap, 0); /* in localeinit.c */
1950
1951 list = rb_ary_new2(enc_table.count);
1953 rb_encoding_list = list;
1955
1956 for (i = 0; i < enc_table.count; ++i) {
1957 rb_ary_push(list, enc_new(enc_table.list[i].enc));
1958 }
1959
1961}
1962
1963void
1965{
1966 rb_enc_init();
1967}
1968
1969/* locale insensitive ctype functions */
1970
1971void
1973{
1974 st_foreach(enc_table.names, func, arg);
1975}
#define ENCINDEX_BUILTIN_MAX
Definition: encindex.h:54
#define ENCINDEX_UTF_8
Definition: encindex.h:43
#define ENCINDEX_US_ASCII
Definition: encindex.h:44
#define ENCINDEX_ASCII
Definition: encindex.h:42
st_table * names
Definition: encoding.c:59
struct rb_encoding_entry * list
Definition: encoding.c:56
rb_encoding * rb_find_encoding(VALUE enc)
Definition: encoding.c:252
#define is_data_encoding(obj)
Definition: encoding.c:85
int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.c:131
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1032
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:779
int rb_filesystem_encindex(void)
Definition: encoding.c:1378
int rb_enc_replicate(const char *name, rb_encoding *encoding)
Definition: encoding.c:402
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:197
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:866
#define ENC_TO_ENCINDEX(enc)
Definition: encoding.c:65
void rb_encdb_declare(const char *name)
Definition: encoding.c:350
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1328
rb_encoding * rb_enc_check_str(VALUE str1, VALUE str2)
Definition: encoding.c:880
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1316
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:1068
#define ENC_INDEX_MASK
Definition: encoding.c:63
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:609
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1387
#define UNSPECIFIED_ENCODING
Definition: encoding.c:70
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1512
int rb_enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:326
int rb_utf8_encindex(void)
Definition: encoding.c:1334
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1014
void Init_Encoding(void)
Definition: encoding.c:1919
int size
Definition: encoding.c:58
int rb_enc_set_dummy(int index)
Definition: encoding.c:393
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:872
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1322
int rb_enc_code_to_mbclen(int code, rb_encoding *enc)
Definition: encoding.c:1100
rb_encoding * rb_enc_get_from_index(int index)
Definition: encoding.c:618
void rb_enc_set_base(const char *name, const char *orig)
Definition: encoding.c:382
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:521
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:990
#define ENCDB_REGISTER(name, enc)
int rb_enc_to_index(rb_encoding *enc)
Definition: encoding.c:125
void Init_encodings(void)
Definition: encoding.c:1964
int rb_data_is_encoding(VALUE obj)
Definition: encoding.c:89
void rb_enc_set_default_internal(VALUE encoding)
Definition: encoding.c:1563
void rb_encdb_set_unicode(int index)
Definition: encoding.c:576
VALUE rb_enc_default_external(void)
Definition: encoding.c:1441
#define ENCODING_NAMELEN_MAX
Definition: encoding.c:72
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:830
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:728
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:462
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:1112
int rb_encdb_replicate(const char *name, const char *orig)
Definition: encoding.c:450
#define MUST_STRING(str)
Definition: encoding.c:24
int rb_enc_find_index2(const char *name, long len)
Definition: encoding.c:717
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1427
ID rb_id_encoding(void)
Definition: encoding.c:759
int rb_locale_encindex(void)
Definition: encoding.c:1354
VALUE rb_cEncoding
Definition: encoding.c:46
NORETURN(static void not_encoding(VALUE enc))
#define rb_intern(str)
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:891
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1020
int rb_enc_capable(VALUE obj)
Definition: encoding.c:753
#define valid_encoding_name_p(name)
Definition: encoding.c:73
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1521
int rb_encdb_dummy(const char *name)
Definition: encoding.c:472
#define ENC_REGISTER(enc)
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:974
unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:1083
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1372
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:1004
#define ENCODING_COUNT
Definition: encoding.c:69
void rb_enc_init(void)
Definition: encoding.c:582
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:245
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:1106
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1340
OnigEncodingType rb_raw_encoding
Definition: encoding.c:30
int count
Definition: encoding.c:57
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:116
void rb_enc_set_default_external(VALUE encoding)
Definition: encoding.c:1479
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:838
int rb_locale_charmap_index(void)
Definition: localeinit.c:109
int rb_enc_find_index(const char *name)
Definition: encoding.c:693
int rb_enc_alias(const char *alias, const char *orig)
Definition: encoding.c:553
int rb_enc_registered(const char *name)
Definition: encoding.c:624
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:1089
#define enc_autoload_p(enc)
Definition: encoding.c:75
int rb_encdb_alias(const char *alias, const char *orig)
Definition: encoding.c:565
#define ENC_SET_DUMMY(enc)
Definition: encoding.c:67
#define is_obj_encoding(obj)
Definition: encoding.c:86
void rb_enc_foreach_name(int(*func)(st_data_t name, st_data_t idx, st_data_t arg), st_data_t arg)
Definition: encoding.c:1972
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:1044
int rb_usascii_encindex(void)
Definition: encoding.c:1346
#define ENC_DUMMY_P(enc)
Definition: encoding.c:66
#define ENCODING_SET_INLINED(obj, i)
Definition: encoding.h:59
#define ENC_CODERANGE_7BIT
Definition: encoding.h:104
int rb_enc_str_coderange(VALUE)
Definition: string.c:657
#define rb_enc_name(enc)
Definition: encoding.h:177
#define rb_enc_isascii(c, enc)
Definition: encoding.h:230
VALUE rb_locale_charmap(VALUE klass)
Definition: localeinit.c:91
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:208
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:192
#define rb_enc_asciicompat(enc)
Definition: encoding.h:245
#define ENCODING_INLINE_MAX
Definition: encoding.h:40
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:678
#define rb_enc_mbminlen(enc)
Definition: encoding.h:180
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:191
#define ENC_CODERANGE_ASCIIONLY(obj)
Definition: encoding.h:109
#define ENCODING_GET_INLINED(obj)
Definition: encoding.h:61
#define ENC_CODERANGE_CLEAR(obj)
Definition: encoding.h:111
char str[HTML_ESCAPE_MAX_LEN+1]
Definition: escape.c:18
VALUE rb_define_class(const char *, VALUE)
Defines a top-level class.
Definition: class.c:662
void rb_undef_method(VALUE, const char *)
Definition: class.c:1593
VALUE rb_cObject
Object class.
Definition: ruby.h:2012
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:2671
void rb_bug(const char *fmt,...)
Definition: error.c:636
void rb_set_errinfo(VALUE err)
Sets the current exception ($!) to the given value.
Definition: eval.c:1896
VALUE rb_eTypeError
Definition: error.c:924
VALUE rb_eEncCompatError
Definition: error.c:931
void rb_warn(const char *fmt,...)
Definition: error.c:315
VALUE rb_eArgError
Definition: error.c:925
void rb_loaderror(const char *fmt,...)
Definition: error.c:2690
VALUE rb_errinfo(void)
The current exception in the current thread.
Definition: eval.c:1882
VALUE rb_eEncodingError
Definition: error.c:930
VALUE rb_obj_class(VALUE)
Equivalent to Object#class in Ruby.
Definition: object.c:217
verbose(int level, const char *format,...)
Definition: mjit_worker.c:303
const char * name
Definition: nkf.c:208
const char * alias
Definition: nkf.c:1159
@ UTF_32
Definition: nkf.c:117
@ UTF_16BE
Definition: nkf.c:113
@ UTF_16
Definition: nkf.c:112
@ UTF8_MAC
Definition: nkf.c:111
@ UTF_32BE
Definition: nkf.c:118
@ UTF_8
Definition: nkf.c:108
@ EUC_JP
Definition: nkf.c:99
@ UTF_32LE
Definition: nkf.c:120
@ UTF_16LE
Definition: nkf.c:115
@ ASCII
Definition: nkf.c:87
#define ONIGENC_MBC_ENC_LEN(enc, p, e)
Definition: onigmo.h:361
#define ONIGENC_IS_UNICODE(enc)
Definition: onigmo.h:327
#define UChar
Definition: onigmo.h:76
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
Definition: onigmo.h:352
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e)
Definition: onigmo.h:356
#define ONIGENC_FLAG_UNICODE
Definition: onigmo.h:313
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: onigmo.h:367
#define debug(lvl, x...)
Definition: ffi.c:52
#define ruby_debug
#define STRCASECMP(s1, s2)
#define MEMCPY(p1, p2, type, n)
#define NULL
#define rb_funcallv(recv, mid, argc, argv)
#define RBASIC_CLEAR_CLASS(obj)
#define T_FILE
use StringValue() instead")))
#define RSTRING_LEN(str)
#define ALLOCA_N(type, n)
void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen)
Definition: string.c:2230
unsigned long st_data_t
size_t strlen(const char *)
#define T_STRING
#define ISUPPER(c)
VALUE rb_ivar_get(VALUE, ID)
Definition: variable.c:1070
#define RSTRING_END(str)
const VALUE VALUE obj
#define rb_check_frozen(obj)
#define RSTRING_PTR(str)
void rb_gc_register_mark_object(VALUE)
Definition: gc.c:7079
#define NIL_P(v)
#define REALLOC_N(var, type, n)
const char size_t n
#define T_DATA
#define ruby_verbose
int rb_require_internal(VALUE fname)
Definition: load.c:1110
unsigned long VALUE
VALUE rb_ary_push(VALUE, VALUE)
Definition: array.c:1195
VALUE rb_sym2str(VALUE)
Definition: symbol.c:784
VALUE rb_check_string_type(VALUE)
Definition: string.c:2314
#define xmalloc
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
uint32_t i
#define char
VALUE rb_fstring(VALUE)
Definition: string.c:312
__inline__ const void *__restrict__ size_t len
#define INT2NUM(x)
VALUE rb_ary_replace(VALUE copy, VALUE orig)
Definition: array.c:3811
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2891
#define NUM2INT(x)
void rb_define_singleton_method(VALUE, const char *, VALUE(*)(), int)
#define RUBY_TYPED_FREE_IMMEDIATELY
#define ISALNUM(c)
#define PRIsVALUE
void * memset(void *, int, size_t)
VALUE rb_ary_new(void)
Definition: array.c:723
#define TypedData_Wrap_Struct(klass, data_type, sval)
#define RDATA(obj)
#define CONST_ID(var, str)
#define TRUE
#define FALSE
#define Qtrue
char * strdup(const char *) __attribute__((__malloc__)) __attribute__((__warn_unused_result__))
#define UNLIKELY(x)
struct rb_call_cache buf
#define ISDIGIT(c)
#define Qnil
#define Qfalse
#define DATA_PTR(dta)
void * memcpy(void *__restrict__, const void *__restrict__, size_t)
int Init_enc_set_filesystem_encoding(void)
Definition: localeinit.c:119
#define RB_TYPE_P(obj, type)
#define SPECIAL_CONST_P(x)
#define T_SYMBOL
#define ISLOWER(c)
const VALUE * argv
#define SYMBOL_P(x)
__inline__ int
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1300
#define CLASS_OF(v)
#define TOLOWER(c)
if((__builtin_expect(!!(!me), 0)))
VALUE rb_hash_aset(VALUE, VALUE, VALUE)
Definition: hash.c:2852
#define rb_check_arity
VALUE rb_sprintf(const char *,...) __attribute__((format(printf
unsigned long ID
const char *void rb_warning(const char *,...) __attribute__((format(printf
void rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE(*dumper)(VALUE), VALUE(*loader)(VALUE, VALUE))
Definition: marshal.c:134
#define rb_fstring_cstr(str)
void rb_define_method(VALUE, const char *, VALUE(*)(), int)
#define rb_ary_new2
#define BUILTIN_TYPE(x)
VALUE rb_hash_new(void)
Definition: hash.c:1523
void rb_ary_store(VALUE, long, VALUE)
Definition: array.c:1079
#define ISASCII(c)
VALUE rb_ary_entry(VALUE, long)
Definition: array.c:1512
#define StringValueCStr(v)
#define T_REGEXP
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
Definition: regenc.h:217
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c)
Definition: regenc.h:218
#define ONIGENC_IS_ASCII_CODE(code)
Definition: regenc.h:216
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1178
int st_insert2(st_table *tab, st_data_t key, st_data_t value, st_data_t(*func)(st_data_t))
Definition: st.c:1263
int st_insert(st_table *tab, st_data_t key, st_data_t value)
Definition: st.c:1171
int st_lookup(st_table *tab, st_data_t key, st_data_t *value)
Definition: st.c:1101
int st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
Definition: st.c:1717
st_table * st_init_strcasetable(void)
Definition: st.c:683
const char * name
Definition: onigmo.h:162
int ruby_encoding_index
Definition: onigmo.h:178
rb_encoding * enc
Definition: encoding.c:1394
Definition: encoding.c:49
const char * name
Definition: encoding.c:50
rb_encoding * base
Definition: encoding.c:52
rb_encoding * enc
Definition: encoding.c:51
void Init_w32_codepage(void)
Definition: file.c:722