I have had a StackOverflow answer to the question “Efficient way to remove ALL whitespace from String?” for a while now. After .NET 7 was released with many performance optimizations I figured I wanted to go over this answer once again testing all of the “remove whitespace” methods up against multiple frameworks. To make the test a little more robust I used BenchmarkDotnet library.

The runtimes I added to this test was

  • .NETcore 3.1
  • .NET 6
  • .NET 7

Benchmark code

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Jobs;

namespace RemoveWhitespaceBenchmark
{
    [SimpleJob(RuntimeMoniker.NetCoreApp31)]
    [SimpleJob(RuntimeMoniker.Net60)]
    [SimpleJob(RuntimeMoniker.Net70)]
    public class RemoveWhitespace
    {
        [Benchmark]
        [ArgumentsSource(nameof(Data))]
        public string RemoveStringReader(string text, string name)
        {
            StringBuilder s = new StringBuilder(text.Length);
            using (StringReader reader = new StringReader(text))
            {
                int i = 0;
                char c;
                for (; i < text.Length; i++)
                {
                    c = (char)reader.Read();
                    if (!char.IsWhiteSpace(c))
                    {
                        s.Append(c);
                    }
                }
            }

            return s.ToString();
        }

        [Benchmark]
        [ArgumentsSource(nameof(Data))]
        public string RemoveLinqNativeCharIsWhitespace(string text, string name)
        {
            return new string(text.ToCharArray()
                .Where(c => !char.IsWhiteSpace(c))
                .ToArray());
        }

        [Benchmark]
        [ArgumentsSource(nameof(Data))]
        public string RemoveLinq(string text, string name)
        {
            return new string(text.ToCharArray()
                .Where(c => !Char.IsWhiteSpace(c))
                .ToArray());
        }

        [Benchmark]
        [ArgumentsSource(nameof(Data))]
        public string RemoveRegex(string text, string name)
        {
            return Regex.Replace(text, @"\s+", "");
        }

        private static readonly Regex compiled = new Regex(@"\s+", RegexOptions.Compiled);

        [Benchmark]
        [ArgumentsSource(nameof(Data))]
        public string RemoveRegexCompiled(string text, string name)
        {
            return compiled.Replace(text, "");
        }

        [Benchmark]
        [ArgumentsSource(nameof(Data))]
        public string RemoveForLoop(string text, string name)
        {
            string input = text;
            for (int i = input.Length - 1; i >= 0; i--)
            {
                if (char.IsWhiteSpace(input[i]))
                {
                    input = input.Remove(i, 1);
                }
            }
            return input;
        }

        [Benchmark]
        [ArgumentsSource(nameof(Data))]
        public string StringSplitThenJoin(string text, string name)
        {
            return string.Join("", text.Split(default(string[]), StringSplitOptions.RemoveEmptyEntries));
        }

        [Benchmark]
        [ArgumentsSource(nameof(Data))]
        public string RemoveInPlaceCharArray(string text, string name)
        {
            int len = text.Length;
            char[] src = text.ToCharArray();
            int dstIdx = 0;
            for (int i = 0; i < len; i++)
            {
                char ch = src[i];
                switch (ch)
                {
                    case '\u0020':
                    case '\u00A0':
                    case '\u1680':
                    case '\u2000':
                    case '\u2001':
                    case '\u2002':
                    case '\u2003':
                    case '\u2004':
                    case '\u2005':
                    case '\u2006':
                    case '\u2007':
                    case '\u2008':
                    case '\u2009':
                    case '\u200A':
                    case '\u202F':
                    case '\u205F':
                    case '\u3000':
                    case '\u2028':
                    case '\u2029':
                    case '\u0009':
                    case '\u000A':
                    case '\u000B':
                    case '\u000C':
                    case '\u000D':
                    case '\u0085':
                        continue;
                    default:
                        src[dstIdx++] = ch;
                        break;
                }
            }
            return new string(src, 0, dstIdx);
        }

        // Short input
        private const string SHORT_TEXT = "123 123 \t 1adc \n 222";

        private const string EXPECTED_SHORT_TEXT = "1231231adc222";

        // Long input
        private const string LONG_TEXT = "123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222";

        private const string EXPECTED_LONG_TEXT = "1231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc222";

        public IEnumerable<object[]> Data()
        {
            yield return new object[] { SHORT_TEXT, "SHORT" };
            yield return new object[] { LONG_TEXT, "LONG" };
        }
    }
}

Important: The .csproj file has to look something like this:

<PropertyGroup>
  <TargetFrameworks>netcoreapp3.1;net6.0;net7.0</TargetFrameworks>
  <ImplicitUsings>disable</ImplicitUsings>
  <GenerateProgramFile>false</GenerateProgramFile>
  [...]
</PropertyGroup>

Since it is older versions of C#, implicit usings is not a feature.

Running the benchmark test

To run the benchmark you only have to add this to your Program.cs file

public static void Main(string[] args)
{
    BenchmarkRunner.Run<RemoveWhitespace>();
}

Results

Short Text

| Method | Job | Runtime | name | Mean | Error | StdDev | Median | | ——————————– | ————- | ————- | —– | ——– | —— | —— | ——– | | RemoveForLoop | .NET Core 3.1 | .NET Core 3.1 | SHORT | 246.68 | 4.885 | 5.999 | 244.6 | | RemoveForLoop | .NET 7.0 | .NET 7.0 | SHORT | 314.28 | 6.207 | 8.902 | 313.59 | | RemoveForLoop | .NET 6.0 | .NET 6.0 | SHORT | 505.46 | 9.457 | 8.846 | 501.08 | | RemoveInPlaceCharArray | .NET 6.0 | .NET 6.0 | SHORT | 56.17 | 1.601 | 4.516 | 55.45 | | RemoveInPlaceCharArray | .NET 7.0 | .NET 7.0 | SHORT | 59.29 | 1.221 | 1.751 | 58.68 | | RemoveInPlaceCharArray | .NET 6.0 | .NET 6.0 | SHORT | 122.86 | 2.435 | 2.158 | 122.7 | | RemoveLinq | .NET 6.0 | .NET 6.0 | SHORT | 195.59 | 3.962 | 6.05 | 195.68 | | RemoveLinq | .NET 7.0 | .NET 7.0 | SHORT | 205.01 | 4.172 | 9.502 | 203.98 | | RemoveLinq | .NET 6.0 | .NET 6.0 | SHORT | 208.75 | 6.48 | 17.74 | 204.15 | | RemoveLinqNativeCharIsWhitespace | .NET 7.0 | .NET 7.0 | SHORT | 158.74 | 5.834 | 17.11 | 152.9 | | RemoveLinqNativeCharIsWhitespace | .NET 7.0 | .NET 7.0 | SHORT | 202.84 | 4.001 | 3.742 | 201.46 | | RemoveLinqNativeCharIsWhitespace | .NET Core 3.1 | .NET Core 3.1 | SHORT | 238.61 | 3.902 | 3.258 | 237.57 | | RemoveRegex | .NET 7.0 | .NET 7.0 | SHORT | 96.39 | 2.538 | 7.282 | 95.44 | | RemoveRegex | .NET 6.0 | .NET 6.0 | SHORT | 145 | 6.99 | 19.829 | 138.66 | | RemoveRegex | .NET Core 3.1 | .NET Core 3.1 | SHORT | 1,007.07 | 19.556 | 25.429 | 1,004.26 | | RemoveRegexCompiled | .NET Core 3.1 | .NET Core 3.1 | SHORT | 155.61 | 3.025 | 3.483 | 154.43 | | RemoveRegexCompiled | .NET Core 3.1 | .NET Core 3.1 | SHORT | 156.72 | 3.024 | 3.361 | 156.18 | | RemoveRegexCompiled | .NET Core 3.1 | .NET Core 3.1 | SHORT | 1,199.94 | 22.786 | 22.379 | 1,189.29 | | RemoveStringReader | .NET Core 3.1 | .NET Core 3.1 | SHORT | 53.05 | 1.105 | 1.085 | 52.72 | | RemoveStringReader | .NET 6.0 | .NET 6.0 | SHORT | 275.37 | 4.94 | 5.49 | 274.29 | | RemoveStringReader | .NET 7.0 | .NET 7.0 | SHORT | 550.95 | 9.963 | 8.832 | 548.1 | | StringSplitThenJoin | .NET 6.0 | .NET 6.0 | SHORT | 196.58 | 3.864 | 4.134 | 195.19 | | StringSplitThenJoin | .NET 7.0 | .NET 7.0 | SHORT | 214.77 | 5.211 | 15.035 | 209.56 | | StringSplitThenJoin | .NET Core 3.1 | .NET Core 3.1 | SHORT | 246.52 | 2.866 | 2.541 | 247.07 |

Long Text

|———————————-+—————+—————+——+———–+———+———+———–| | Method | Job | Runtime | name | Mean | Error | StdDev | Median | |———————————-|—————|—————|——|———–|———|———|———–| | RemoveForLoop | .NET 7.0 | .NET 7.0 | LONG | 9,534.81 | 187.211 | 229.912 | 9,450.11 | | RemoveForLoop | .NET 6.0 | .NET 6.0 | LONG | 9,574.58 | 170.786 | 250.336 | 9,560.20 | | RemoveForLoop | .NET Core 3.1 | .NET Core 3.1 | LONG | 10,212.44 | 200.69 | 187.725 | 10,218.33 | | RemoveInPlaceCharArray | .NET 6.0 | .NET 6.0 | LONG | 795.89 | 15.824 | 26 | 789.73 | | RemoveInPlaceCharArray | .NET 7.0 | .NET 7.0 | LONG | 860.86 | 17.298 | 26.416 | 851.84 | | RemoveInPlaceCharArray | .NET Core 3.1 | .NET Core 3.1 | LONG | 865.46 | 29.865 | 85.206 | 842.39 | | RemoveLinq | .NET 7.0 | .NET 7.0 | LONG | 2,127.38 | 38.694 | 48.936 | 2,110.64 | | RemoveLinq | .NET 6.0 | .NET 6.0 | LONG | 2,131.09 | 38.736 | 54.302 | 2,113.29 | | RemoveLinq | .NET Core 3.1 | .NET Core 3.1 | LONG | 2,467.73 | 31.776 | 24.809 | 2,475.30 | | RemoveLinqNativeCharIsWhitespace | .NET 6.0 | .NET 6.0 | LONG | 2,090.81 | 19.523 | 15.242 | 2,085.87 | | RemoveLinqNativeCharIsWhitespace | .NET 7.0 | .NET 7.0 | LONG | 2,162.54 | 40.383 | 49.594 | 2,144.46 | | RemoveLinqNativeCharIsWhitespace | .NET Core 3.1 | .NET Core 3.1 | LONG | 2,251.41 | 43.37 | 46.405 | 2,234.34 | | RemoveRegex | .NET 6.0 | .NET 6.0 | LONG | 8,029.84 | 148.297 | 123.835 | 8,014.60 | | RemoveRegex | .NET 7.0 | .NET 7.0 | LONG | 8,064.66 | 153.11 | 150.375 | 8,044.91 | | RemoveRegex | .NET Core 3.1 | .NET Core 3.1 | LONG | 23,199.52 | 297.498 | 232.267 | 23,290.44 | | RemoveRegexCompiled | .NET 6.0 | .NET 6.0 | LONG | 3,673.86 | 58.893 | 49.179 | 3,665.01 | | RemoveRegexCompiled | .NET 7.0 | .NET 7.0 | LONG | 3,858.84 | 74.988 | 80.236 | 3,834.91 | | RemoveRegexCompiled | .NET Core 3.1 | .NET Core 3.1 | LONG | 20,208.25 | 302.477 | 268.138 | 20,191.36 | | RemoveStringReader | .NET 7.0 | .NET 7.0 | LONG | 1,239.48 | 22.25 | 23.807 | 1,234.84 | | RemoveStringReader | .NET 6.0 | .NET 6.0 | LONG | 2,207.21 | 42.806 | 57.144 | 2,186.48 | | RemoveStringReader | .NET Core 3.1 | .NET Core 3.1 | LONG | 2,925.74 | 54.859 | 106.999 | 2,893.67 | | StringSplitThenJoin | .NET 7.0 | .NET 7.0 | LONG | 2,860.38 | 48.188 | 42.717 | 2,849.34 | | StringSplitThenJoin | .NET 6.0 | .NET 6.0 | LONG | 2,872.59 | 56.242 | 75.082 | 2,846.50 | | StringSplitThenJoin | .NET Core 3.1 | .NET Core 3.1 | LONG | 3,833.31 | 59.144 | 63.283 | 3,844.23 | |———————————-+—————+—————+——+———–+———+———+———–|

test table
content 2


Published

24 November 2022