1 /**
2 `RCString` is a reference-counted string which is based on
3 $(REF Array, std,experimental,collections) of `ubyte`s.
4 By default, `RCString` is not a range. The `.by` helpers can be used to specify
5 the iteration mode.
6 RCString internally stores the string as UTF-8 $(REF Array, stdx,collections,array).
7 
8 $(UL
9     $(LI `str.by!char` - iterates over individual `char` characters. No auto-decoding is done.)
10     $(LI `str.by!wchar` - iterates over `wchar` characters. Auto-decoding is done.)
11     $(LI `str.by!dchar` - iterates over `dchar` characters. Auto-decoding is done.)
12     $(LI `str.by!ubyte`- iterates over the raw `ubyte` representation. No auto-decoding is done. This is similar to $(REF representation, std,string) for built-in strings)
13 )
14 */
15 module stdx.collections.rcstring;
16 
17 import stdx.collections.common;
18 import stdx.collections.array;
19 import std.range.primitives : isInputRange, ElementType, hasLength;
20 import std.traits : isSomeChar, isSomeString;
21 
22 debug(CollectionRCString) import std.stdio;
23 
24 version(unittest)
25 {
26     import std.experimental.allocator.mallocator;
27     import std.experimental.allocator.building_blocks.stats_collector;
28     import std.experimental.allocator : RCIAllocator, RCISharedAllocator,
29            allocatorObject, sharedAllocatorObject;
30     import std.algorithm.mutation : move;
31     import std.stdio;
32 
33     private alias SCAlloc = StatsCollector!(Mallocator, Options.bytesUsed);
34 }
35 
36 ///
37 struct RCString
38 {
39 private:
40     Array!ubyte _support;
41     mixin(allocatorHandler);
42 public:
43 
44     /**
45      Constructs a qualified rcstring that will use the provided
46      allocator object. For `immutable` objects, a `RCISharedAllocator` must
47      be supplied.
48 
49      Params:
50           allocator = a $(REF RCIAllocator, std,experimental,allocator) or
51                       $(REF RCISharedAllocator, std,experimental,allocator)
52                       allocator object
53 
54      Complexity: $(BIGOH 1)
55     */
56     this(A, this Q)(A allocator)
57     if (!is(Q == shared)
58         && (is(A == RCISharedAllocator) || !is(Q == immutable))
59         && (is(A == RCIAllocator) || is(A == RCISharedAllocator)))
60     {
61         debug(CollectionRCString)
62         {
63             writefln("RCString.ctor: begin");
64             scope(exit) writefln("RCString.ctor: end");
65         }
66         static if (is(Q == immutable) || is(Q == const))
67             _allocator = immutable AllocatorHandler(allocator);
68         else
69             setAllocator(allocator);
70     }
71 
72     ///
73     @safe unittest
74     {
75         import std.experimental.allocator : theAllocator, processAllocator;
76 
77         auto a = RCString(theAllocator);
78         auto ca = const RCString(processAllocator);
79         auto ia = immutable RCString(processAllocator);
80     }
81 
82     /**
83     Constructs a qualified rcstring out of a number of bytes
84     that will use the provided allocator object.
85     For `immutable` objects, a `RCISharedAllocator` must be supplied.
86     If no allocator is passed, the default allocator will be used.
87 
88     Params:
89          allocator = a $(REF RCIAllocator, std,experimental,allocator) or
90                      $(REF RCISharedAllocator, std,experimental,allocator)
91                      allocator object
92          bytes = a variable number of bytes, either in the form of a
93                   list or as a built-in RCString
94 
95     Complexity: $(BIGOH m), where `m` is the number of bytes.
96     */
97     this()(ubyte[] bytes...)
98     {
99         this(defaultAllocator!(typeof(this)), bytes);
100     }
101 
102     ///
103     @safe unittest
104     {
105         // Create a list from a list of bytes
106         auto a = RCString('1', '2', '3');
107 
108         // Create a list from an array of bytes
109         auto b = RCString(['1', '2', '3']);
110 
111         // Create a const list from a list of bytes
112         auto c = const RCString('1', '2', '3');
113     }
114 
115     /// ditto
116     this(A, this Q)(A allocator, ubyte[] bytes...)
117     if (!is(Q == shared)
118         && (is(A == RCISharedAllocator) || !is(Q == immutable))
119         && (is(A == RCIAllocator) || is(A == RCISharedAllocator)))
120     {
121         this(allocator);
122         _support = typeof(_support)(allocator, bytes);
123     }
124 
125     ///
126     @safe unittest
127     {
128         import std.experimental.allocator : theAllocator, processAllocator;
129 
130         // Create a list from a list of ints
131         auto a = RCString(theAllocator, '1', '2', '3');
132 
133         // Create a list from an array of ints
134         auto b = RCString(theAllocator, ['1', '2', '3']);
135     }
136 
137     /**
138     Constructs a qualified rcstring out of a string
139     that will use the provided allocator object.
140     For `immutable` objects, a `RCISharedAllocator` must be supplied.
141     If no allocator is passed, the default allocator will be used.
142 
143     Params:
144          allocator = a $(REF RCIAllocator, std,experimental,allocator) or
145                      $(REF RCISharedAllocator, std,experimental,allocator)
146                      allocator object
147          s = input string
148 
149     Complexity: $(BIGOH m), where `m` is the number of bytes of the input string.
150     */
151     this()(string s)
152     {
153         import std.string : representation;
154         this(defaultAllocator!(typeof(this)), s.dup.representation);
155     }
156 
157     ///
158     @safe unittest
159     {
160         import std.algorithm.comparison : equal;
161         auto s = RCString("dlang");
162         assert(s.by!char.equal("dlang"));
163     }
164 
165     /// ditto
166     this(this Q)(dstring s)
167     {
168         import std.utf : byChar;
169         this(s.byChar);
170     }
171 
172     ///
173     @safe unittest
174     {
175         import std.algorithm.comparison : equal;
176         auto s = RCString("dlang"d);
177         assert(s.by!char.equal("dlang"));
178     }
179 
180     /// ditto
181     this(this Q)(wstring s)
182     {
183         import std.utf : byChar;
184         this(s.byChar);
185     }
186 
187     ///
188     @safe unittest
189     {
190         import std.algorithm.comparison : equal;
191         auto s = RCString("dlang"w);
192         assert(s.by!char.equal("dlang"));
193     }
194 
195     /**
196     Constructs a qualified rcstring out of an input range
197     that will use the provided allocator object.
198     For `immutable` objects, a `RCISharedAllocator` must be supplied.
199     If no allocator is passed, the default allocator will be used.
200 
201     Params:
202          allocator = a $(REF RCIAllocator, std,experimental,allocator) or
203                      $(REF RCISharedAllocator, std,experimental,allocator)
204                      allocator object
205          r = input range
206 
207     Complexity: $(BIGOH n), where `n` is the number of elemtns of the input range.
208     */
209     this(this Q, A, R)(A allocator, R r)
210     if (!is(Q == shared)
211         && (is(A == RCISharedAllocator) || !is(Q == immutable))
212         && (is(A == RCIAllocator) || is(A == RCISharedAllocator))
213         && isInputRange!R && isSomeChar!(ElementType!R) && !isSomeString!R)
214     {
215         import std.utf : byChar;
216         this(allocator);
217         static if (hasLength!R)
218             _support.reserve(r.length);
219         foreach (e; r.byChar)
220             _support ~= cast(ubyte) e;
221     }
222 
223     /// ditto
224     this(this Q, R)(R r)
225     if (isInputRange!R && isSomeChar!(ElementType!R) && !isSomeString!R)
226     {
227         this(defaultAllocator!(typeof(this)), r);
228     }
229 
230     ///
231     @safe unittest
232     {
233         import std.range : take;
234         import std.utf : byCodeUnit;
235         auto s = RCString("dlang".byCodeUnit.take(10));
236         assert(s.equal("dlang"));
237     }
238 
239     ///
240     @nogc nothrow pure @safe
241     bool empty() const
242     {
243         return _support.empty;
244     }
245 
246     ///
247     @safe unittest
248     {
249         assert(!RCString("dlang").empty);
250         assert(RCString("").empty);
251     }
252 
253     ///
254     @trusted
255     auto by(T)()
256     if (is(T == char) || is(T == wchar) || is(T == dchar))
257     {
258         Array!char tmp = *cast(Array!char*)(&_support);
259         static if (is(T == char))
260         {
261             return tmp;
262         }
263         else
264         {
265             import std.utf : byUTF;
266             return tmp.byUTF!T();
267         }
268     }
269 
270     ///
271     @safe unittest
272     {
273         import std.algorithm.comparison : equal;
274         import std.utf : byChar, byWchar;
275         auto hello = RCString("你好");
276         assert(hello.by!char.equal("你好".byChar));
277         assert(hello.by!wchar.equal("你好".byWchar));
278         assert(hello.by!dchar.equal("你好"));
279     }
280 
281     ///
282     typeof(this) opBinary(string op)(typeof(this) rhs)
283     if (op == "~")
284     {
285         RCString s = this;
286         s._support ~= rhs._support;
287         return s;
288     }
289 
290     /// ditto
291     typeof(this) opBinary(string op)(string rhs)
292     if (op == "~")
293     {
294         auto rcs = RCString(rhs);
295         RCString s = this;
296         s._support ~= rcs._support;
297         return s;
298     }
299 
300     /// ditto
301     typeof(this) opBinaryRight(string op)(string rhs)
302     if (op == "~")
303     {
304         auto s = RCString(rhs);
305         RCString rcs = this;
306         s._support ~= rcs._support;
307         return s;
308     }
309 
310     /// ditto
311     typeof(this) opBinary(string op, C)(C c)
312     if (op == "~" && isSomeChar!C)
313     {
314         RCString s = this;
315         s._support ~= cast(ubyte) c;
316         return s;
317     }
318 
319     /// ditto
320     typeof(this) opBinaryRight(string op, C)(C c)
321     if (op == "~" && isSomeChar!C)
322     {
323         RCString rcs = this;
324         rcs._support.insert(0, cast(ubyte) c);
325         return rcs;
326     }
327 
328     /// ditto
329     typeof(this) opBinary(string op, R)(R r)
330     if (op == "~" && isInputRange!R && isSomeChar!(ElementType!R) && !isSomeString!R)
331     {
332         RCString s = this;
333         static if (hasLength!R)
334             s._support.reserve(s._support.length + r.length);
335         foreach (el; r)
336         {
337             s._support ~= cast(ubyte) el;
338         }
339         return s;
340     }
341 
342     /// ditto
343     typeof(this) opBinaryRight(string op, R)(R lhs)
344     if (op == "~" && isInputRange!R && isSomeChar!(ElementType!R) && !isSomeString!R)
345     {
346         auto l = RCString(lhs);
347         RCString rcs = this;
348         l._support ~= rcs._support;
349         return l;
350     }
351 
352     ///
353     @safe unittest
354     {
355         auto r1 = RCString("abc");
356         auto r2 = RCString("def");
357         assert((r1 ~ r2).equal("abcdef"));
358         assert((r1 ~ "foo").equal("abcfoo"));
359         assert(("abc" ~ r2).equal("abcdef"));
360         assert((r1 ~ 'd').equal("abcd"));
361         assert(('a' ~ r2).equal("adef"));
362     }
363 
364     ///
365     @safe unittest
366     {
367         import std.range : take;
368         import std.utf : byCodeUnit;
369         auto r1 = RCString("abc");
370         auto r2 = "def".byCodeUnit.take(3);
371         assert((r1 ~ r2).equal("abcdef"));
372         assert((r2 ~ r1).equal("defabc"));
373     }
374 
375     ///
376     auto opBinary(string op)(typeof(this) rhs)
377     if (op == "in")
378     {
379         // TODO
380         import std.algorithm.searching : find;
381         return this.by!char.find(rhs.by!char);
382     }
383 
384     auto opBinaryRight(string op)(string rhs)
385     if (op == "in")
386     {
387         // TODO
388         import std.algorithm.searching : find;
389         return rhs.find(this.by!char);
390     }
391 
392     ///
393     @safe unittest
394     {
395         auto r1 = RCString("abc");
396         auto r2 = RCString("def");
397         auto rtext = RCString("abcdefgh");
398         //import std.stdio;
399         //(r1 in rtext).writeln;
400         //(r1 in rtext).writeln;
401     }
402 
403     ///
404     typeof(this) opOpAssign(string op)(typeof(this) rhs)
405     if (op == "~")
406     {
407         _support ~= rhs._support;
408         return this;
409     }
410 
411     ///
412     @safe unittest
413     {
414         auto r1 = RCString("abc");
415         r1 ~= RCString("def");
416         assert(r1.equal("abcdef"));
417     }
418 
419     /// ditto
420     typeof(this) opOpAssign(string op)(string rhs)
421     if (op == "~")
422     {
423         import std.string : representation;
424         _support ~= rhs.representation;
425         return this;
426     }
427 
428     ///
429     @safe unittest
430     {
431         auto r1 = RCString("abc");
432         r1 ~= "def";
433         assert(r1.equal("abcdef"));
434     }
435 
436     typeof(this) opOpAssign(string op, C)(C c)
437     if (op == "~" && isSomeChar!C)
438     {
439         _support ~= cast(ubyte) c;
440         return this;
441     }
442 
443     ///
444     @safe unittest
445     {
446         auto r1 = RCString("abc");
447         r1 ~= 'd';
448         assert(r1.equal("abcd"));
449     }
450 
451     typeof(this) opOpAssign(string op, R)(R r)
452     if (op == "~" && isSomeChar!(ElementType!R) && isInputRange!R && !isSomeString!R)
453     {
454         _support ~= RCString(r)._support;
455         return this;
456     }
457 
458     ///
459     @safe unittest
460     {
461         import std.range : take;
462         import std.utf : byCodeUnit;
463         auto r1 = RCString("abc");
464         r1 ~= "foo".byCodeUnit.take(4);
465         assert(r1.equal("abcfoo"));
466     }
467 
468     ///
469     bool opEquals()(auto ref typeof(this) rhs) const
470     {
471         return _support == rhs._support;
472     }
473 
474     ///
475     @safe unittest
476     {
477         assert(RCString("abc") == RCString("abc"));
478         assert(RCString("abc") != RCString("Abc"));
479         assert(RCString("abc") != RCString("abd"));
480         assert(RCString("abc") != RCString(""));
481         assert(RCString("") == RCString(""));
482     }
483 
484     /// ditto
485     bool opEquals()(string rhs) const
486     {
487         import std.string : representation;
488         import std.algorithm.comparison : equal;
489         return _support._payload.equal(rhs.representation);
490     }
491 
492     ///
493     @safe unittest
494     {
495         assert(RCString("abc") == "abc");
496         assert(RCString("abc") != "Abc");
497         assert(RCString("abc") != "abd");
498         assert(RCString("abc") != "");
499         assert(RCString("") == "");
500     }
501 
502     bool opEquals(R)(R r)
503     if (isSomeChar!(ElementType!R) && isInputRange!R && !isSomeString!R)
504     {
505         import std.algorithm.comparison : equal;
506         return _support.equal(r);
507     }
508 
509     ///
510     @safe unittest
511     {
512         import std.range : take;
513         import std.utf : byCodeUnit;
514         assert(RCString("abc") == "abc".byCodeUnit.take(3));
515         assert(RCString("abc") != "Abc".byCodeUnit.take(3));
516         assert(RCString("abc") != "abd".byCodeUnit.take(3));
517         assert(RCString("abc") != "".byCodeUnit.take(3));
518         assert(RCString("") == "".byCodeUnit.take(3));
519     }
520 
521     ///
522     int opCmp()(auto ref typeof(this) rhs)
523     {
524         return _support.opCmp(rhs._support);
525     }
526 
527     ///
528     @safe unittest
529     {
530         assert(RCString("abc") <= RCString("abc"));
531         assert(RCString("abc") >= RCString("abc"));
532         assert(RCString("abc") > RCString("Abc"));
533         assert(RCString("Abc") < RCString("abc"));
534         assert(RCString("abc") < RCString("abd"));
535         assert(RCString("abc") > RCString(""));
536         assert(RCString("") <= RCString(""));
537         assert(RCString("") >= RCString(""));
538     }
539 
540     int opCmp()(string rhs)
541     {
542         import std.string : representation;
543         return _support.opCmp(rhs.representation);
544     }
545 
546     ///
547     @safe unittest
548     {
549         assert(RCString("abc") <= "abc");
550         assert(RCString("abc") >= "abc");
551         assert(RCString("abc") > "Abc");
552         assert(RCString("Abc") < "abc");
553         assert(RCString("abc") < "abd");
554         assert(RCString("abc") > "");
555         assert(RCString("") <= "");
556         assert(RCString("") >= "");
557     }
558 
559     int opCmp(R)(R rhs)
560     if (isSomeChar!(ElementType!R) && isInputRange!R && !isSomeString!R)
561     {
562         import std.string : representation;
563         return _support.opCmp(rhs);
564     }
565 
566     ///
567     @safe unittest
568     {
569         import std.range : take;
570         import std.utf : byCodeUnit;
571         assert(RCString("abc") <= "abc".byCodeUnit.take(3));
572         assert(RCString("abc") >= "abc".byCodeUnit.take(3));
573         assert(RCString("abc") > "Abc".byCodeUnit.take(3));
574         assert(RCString("Abc") < "abc".byCodeUnit.take(3));
575         assert(RCString("abc") < "abd".byCodeUnit.take(3));
576         assert(RCString("abc") > "".byCodeUnit.take(3));
577         assert(RCString("") <= "".byCodeUnit.take(3));
578         assert(RCString("") >= "".byCodeUnit.take(3));
579     }
580 
581     auto opSlice(size_t start, size_t end)
582     {
583         RCString s = save;
584         s._support = s._support[start .. end];
585         return s;
586     }
587 
588     ///
589     @safe unittest
590     {
591         auto a = RCString("abcdef");
592         assert(a[2 .. $].equal("cdef"));
593         assert(a[0 .. 2].equal("ab"));
594         assert(a[3 .. $ - 1].equal("de"));
595     }
596 
597     ///
598     auto opDollar()
599     {
600         return _support.length;
601     }
602 
603     ///
604     auto save()
605     {
606         RCString s = this;
607         return s;
608     }
609 
610     ///
611     auto opSlice()
612     {
613         return this.save;
614     }
615 
616     // Phobos
617     auto equal(T)(T rhs)
618     {
619         import std.algorithm.comparison : equal;
620         return by!char.equal(rhs);
621     }
622 
623     auto writeln(T...)(T rhs)
624     {
625         import std.stdio : writeln;
626         return by!char.writeln(rhs);
627     }
628 
629     string toString()
630     {
631         import std.array : array;
632         import std.exception : assumeUnique;
633         return by!char.array.assumeUnique;
634     }
635 
636     ///
637     auto opSliceAssign(char c, size_t start, size_t end)
638     {
639         _support[start .. end] = cast(ubyte) c;
640     }
641 
642     ///
643     @safe unittest
644     {
645         auto r1 = RCString("abcdef");
646         r1[2..4] = '0';
647         assert(r1.equal("ab00ef"));
648     }
649 
650     ///
651     bool opCast(T : bool)()
652     {
653         return !empty;
654     }
655 
656     ///
657     @safe unittest
658     {
659         assert(RCString("foo"));
660         assert(!RCString(""));
661     }
662 
663     /// ditto
664     auto ref opAssign()(RCString rhs)
665     {
666         _support = rhs._support;
667         return this;
668     }
669 
670     /// ditto
671     auto ref opAssign(R)(R rhs)
672     {
673         _support = RCString(rhs)._support;
674         return this;
675     }
676 
677     ///
678     @safe unittest
679     {
680         auto rc = RCString("foo");
681         assert(rc.equal("foo"));
682         rc = RCString("bar1");
683         assert(rc.equal("bar1"));
684         rc = "bar2";
685         assert(rc.equal("bar2"));
686 
687         import std.range : take;
688         import std.utf : byCodeUnit;
689         rc = "bar3".take(10).byCodeUnit;
690         assert(rc.equal("bar3"));
691     }
692 
693     auto dup()()
694     {
695         return RCString(by!char);
696     }
697 
698     ///
699     @safe unittest
700     {
701         auto s = RCString("foo");
702         s = RCString("bar");
703         assert(s.equal("bar"));
704         auto s2 = s.dup;
705         s2 = RCString("fefe");
706         assert(s.equal("bar"));
707         assert(s2.equal("fefe"));
708     }
709 
710     auto idup()()
711     {
712         return RCString!(immutable(char))(by!char);
713     }
714 
715     ///
716     @safe unittest
717     {
718         auto s = RCString("foo");
719         s = RCString("bar");
720         assert(s.equal("bar"));
721         auto s2 = s.dup;
722         s2 = RCString("fefe");
723         assert(s.equal("bar"));
724         assert(s2.equal("fefe"));
725     }
726 
727     ///
728     auto opIndex(size_t pos)
729     in
730     {
731         assert(pos < _support.length, "Invalid position.");
732     }
733     body
734     {
735         return _support[pos];
736     }
737 
738     ///
739     @safe unittest
740     {
741         auto s = RCString("bar");
742         assert(s[0] == 'b');
743         assert(s[1] == 'a');
744         assert(s[2] == 'r');
745     }
746 
747     ///
748     auto opIndexAssign(char el, size_t pos)
749     in
750     {
751         assert(pos < _support.length, "Invalid position.");
752     }
753     body
754     {
755         return _support[pos] = cast(ubyte) el;
756     }
757 
758     ///
759     @safe unittest
760     {
761         auto s = RCString("bar");
762         assert(s[0] == 'b');
763         s[0] = 'f';
764         assert(s.equal("far"));
765     }
766 
767     ///
768     auto opIndexAssign(char c)
769     {
770         _support[] = cast(ubyte) c;
771     }
772 
773     ///
774     auto toHash()
775     {
776         // will be safe with 2.082
777         return () @trusted { return _support.hashOf; }();
778     }
779 
780     ///
781     @safe unittest
782     {
783         auto rc = RCString("abc");
784         assert(rc.toHash == RCString("abc").toHash);
785         rc ~= 'd';
786         assert(rc.toHash == RCString("abcd").toHash);
787         assert(RCString().toHash == RCString().toHash);
788     }
789 }
790 
791 @safe unittest
792 {
793     import std.algorithm.comparison : equal;
794 
795     auto buf = cast(ubyte[])("aaa".dup);
796     auto s = RCString(buf);
797 
798     assert(equal(s.by!char, "aaa"));
799     s.by!char.front = 'b';
800     assert(equal(s.by!char, "baa"));
801 }
802 
803 @safe unittest
804 {
805     import std.algorithm.comparison : equal;
806 
807     auto buf = cast(ubyte[])("hell\u00F6".dup);
808     auto s = RCString(buf);
809 
810     assert(s.by!char().equal(['h', 'e', 'l', 'l', 0xC3, 0xB6]));
811 
812     // `wchar`s are able to hold the ö in a single element (UTF-16 code unit)
813     assert(s.by!wchar().equal(['h', 'e', 'l', 'l', 'ö']));
814 }
815 
816 @safe unittest
817 {
818     import std.algorithm.comparison : equal;
819 
820     auto buf = cast(ubyte[])("hello".dup);
821     auto s = RCString(buf);
822     auto charStr = s.by!char;
823 
824     charStr[$ - 2] = cast(ubyte) 0xC3;
825     charStr[$ - 1] = cast(ubyte) 0xB6;
826 
827     assert(s.by!wchar().equal(['h', 'e', 'l', 'ö']));
828 }