Feeds:
Posts
Comments

Posts Tagged ‘C#’

For various projects I’ve had a need to determine how similar two images are. Some of my projects do this for hundreds of thousands of image pairs. This means I want it to work fast. This is a survey of the methods I’ve tried.

We’re going to determine how similar the following two images are.

Here’s the test harness:

    public class Test
    {
        private const int RunCount = 10000;
        private const string PathToImage1 = @"..\Release\240px-Mona_Lisa-restored.jpg";
        private const string PathToImage2 = @"..\Release\image_100138.png";

        public void TimeGettingDifference(Func<Bitmap, Bitmap, int> getDifference)
        {
            var image1 = new Bitmap(PathToImage1);
            var image2 = new Bitmap(PathToImage2);

            Assert.AreEqual(image1.GetBytesFromBitmap().Length, image2.GetBytesFromBitmap().Length);

            var stopwatch = new Stopwatch();
            stopwatch.Start();

            int difference = 0;
            for (int i = 0; i < RunCount; i++)
            {
                difference = getDifference(image1, image2);
            }
            stopwatch.Stop();
            var elapsedSeconds = stopwatch.Elapsed.TotalSeconds;
            Console.WriteLine("Average over "+RunCount+" runs: "+(elapsedSeconds/RunCount)+" seconds - "+difference);
        }
    }

This makes it easy to try different methods. All we have to do is pass in a function that takes two bitmaps and returns an integer representing the sum of the byte differences between the two Bitmaps.

Version 1

        public void Get_bytes_and_use_linq_to_sum_differences()
        {
            Func<Bitmap, Bitmap, int> getDifference = (image1, image2) =>
                {
                    var image1Bytes = image1.GetBytesFromBitmap();
                    var image2Bytes = image2.GetBytesFromBitmap();

                    return Enumerable.Range(0, image1Bytes.Length)
                        .Select(index => Math.Abs(image1Bytes[index] - image2Bytes[index]))
                        .Sum();

                };
            TimeGettingDifference(getDifference);
        }

    public static class BitmapExtensions
    {
        public static byte[] GetBytesFromBitmap(this Bitmap bitmap)
        {
            var rectangle = new System.Drawing.Rectangle(0, 0, bitmap.Width, bitmap.Height);
            var bData = bitmap.LockBits(rectangle, ImageLockMode.ReadWrite, bitmap.PixelFormat);
            byte[] data;
            try
            {
                int size = bData.Stride * bData.Height;
                data = new byte[size];
                Marshal.Copy(bData.Scan0, data, 0, size);
            }
            finally
            {
                bitmap.UnlockBits(bData);
            }
            return data;
        }
    }

result:

Average over 10000 runs: 0.00593082636 seconds - 5282185

Version 2

        public void Get_bytes_and_use_unsafe_loop_to_sum_differences()
        {
            Func<Bitmap, Bitmap, int> getDifference = (image1, image2) =>
                {
                    var image1Bytes = image1.GetBytesFromBitmap();
                    var image2Bytes = image2.GetBytesFromBitmap();

                    return GetDifferencesWithBytes(image1Bytes, image2Bytes);

                };
            TimeGettingDifference(getDifference);
        }

        public static int GetDifferencesWithBytes(byte[] image1Bytes, byte[] image2Bytes)
        {
            int size = image1Bytes.Length;
            int difference = 0;
            unsafe
            {
                fixed (byte* image1Ptr = image1Bytes, image2Ptr = image2Bytes)
                {
                    byte* ptrImage1Byte = image1Ptr, ptrImage2Byte = image2Ptr;
                    for (; size > 0; size--)
                    {
                        if (*ptrImage1Byte > *ptrImage2Byte)
                        {
                            difference += (*ptrImage1Byte - *ptrImage2Byte);
                        }
                        else
                        {
                            difference += (*ptrImage2Byte - *ptrImage1Byte);
                        }

                        ptrImage1Byte++;
                        ptrImage2Byte++;
                    }
                }
            }

            return difference;
        }

result:

Average over 10000 runs: 0.00107074605 seconds - 5282185

Version 3

        public void Pin_bytes_and_use_unsafe_byte_loop_to_sum_differences()
        {
            TimeGettingDifference(LockBitsAndGetDifferenceWithBytes);
        }

        public int LockBitsAndGetDifferenceWithBytes(Bitmap image1, Bitmap image2)
        {
            var rectangle = new System.Drawing.Rectangle(0, 0, image1.Width, image1.Height);
            var image1Data = image1.LockBits(rectangle, ImageLockMode.ReadWrite, image1.PixelFormat);
            var image2Data = image2.LockBits(rectangle, ImageLockMode.ReadWrite, image2.PixelFormat);
            int difference = 0;
            try
            {
                int size = image1Data.Stride * image1Data.Height;
                unsafe
                {
                    var image1Ptr = (byte*)(image1Data.Scan0.ToPointer());
                    var image2Ptr = (byte*)(image2Data.Scan0.ToPointer());

                    byte* ptrImage1Byte = image1Ptr, ptrImage2Byte = image2Ptr;
                    for (int len = size; len > 0; len--)
                    {
                        if (*ptrImage1Byte > *ptrImage2Byte)
                        {
                            difference += (*ptrImage1Byte - *ptrImage2Byte);
                        }
                        else
                        {
                            difference += (*ptrImage2Byte - *ptrImage1Byte);
                        }

                        ptrImage1Byte++;
                        ptrImage2Byte++;
                    }
                }
            }
            finally
            {
                image1.UnlockBits(image1Data);
                image2.UnlockBits(image2Data);
            }
            return difference;
        }

result:

Average over 10000 runs: 0.00081027626 seconds - 5282185

Version 4

        [Test]
        public void Pin_bytes_and_use_unsafe_int_loop_to_sum_differences()
        {
            TimeGettingDifference(LockBitsAndGetDifferenceWithIntegers);
        }

        public int LockBitsAndGetDifferenceWithIntegers(Bitmap image1, Bitmap image2)
        {
            var rectangle = new System.Drawing.Rectangle(0, 0, image1.Width, image1.Height);
            var image1Data = image1.LockBits(rectangle, ImageLockMode.ReadWrite, image1.PixelFormat);
            var image2Data = image2.LockBits(rectangle, ImageLockMode.ReadWrite, image2.PixelFormat);
            int difference = 0;
            try
            {
                int size = image1Data.Stride * image1Data.Height;
                int len = size/4;
                unsafe
                {
                    var image1Ptr = (int*)(image1Data.Scan0.ToPointer());
                    var image2Ptr = (int*)(image2Data.Scan0.ToPointer());

                    int* ptrImage1Byte = image1Ptr, ptrImage2Byte = image2Ptr;
                    for (; len > 0; len--)
                    {
                        var a1 = (byte)(*ptrImage1Byte >> 24);
                        var b1 = (byte)(*ptrImage2Byte >> 24);
                        difference += a1 > b1 ? a1 - b1 : b1 - a1;
                        a1 = (byte)(*ptrImage1Byte >> 16);
                        b1 = (byte)(*ptrImage2Byte >> 16);
                        difference += a1 > b1 ? a1 - b1 : b1 - a1;
                        a1 = (byte)(*ptrImage1Byte >> 8);
                        b1 = (byte)(*ptrImage2Byte >> 8);
                        difference += a1 > b1 ? a1 - b1 : b1 - a1;
                        a1 = (byte)(*ptrImage1Byte);
                        b1 = (byte)(*ptrImage2Byte);
                        difference += a1 > b1 ? a1 - b1 : b1 - a1;
                        ptrImage1Byte++;
                        ptrImage2Byte++;
                    }
                }
            }
            finally
            {
                image1.UnlockBits(image1Data);
                image2.UnlockBits(image2Data);
            }
            return difference;
        }

result:

Average over 10000 runs: 0.00086579786 seconds - 5282185

So far Version 3 is the fastest. One caveat with Versions 3 and 4 is both images must have the same stride.

Do you know of a faster way to do it without using multiple processors?

Read Full Post »

Gmail requires SMTP connections to be secure. If you try to use log4net‘s SmtpAppender to connect to the Gmail smtp server and get an error like the following:

log4net:ERROR [SmtpAppender] Error occurred while sending e-mail notification.
System.Net.Mail.SmtpException: The SMTP server requires a secure connection or the client was not authenticated. The server response was: 5.5.1 Authentication Required. Learn more at
   at System.Net.Mail.MailCommand.CheckResponse(SmtpStatusCode statusCode, String response)
   at System.Net.Mail.MailCommand.Send(SmtpConnection conn, Byte[] command, String from)
   at System.Net.Mail.SmtpTransport.SendMail(MailAddress sender, MailAddressCollection recipients, String deliveryNotify, SmtpFailedRecipientException& exception)
   at System.Net.Mail.SmtpClient.Send(MailMessage message)
   at log4net.Appender.SmtpAppender.SendEmail(String messageBody)

the most common cause is failure to add the authentication line to your configuration:

<appender name="SmtpAppender" type="log4net.Appender.SmtpAppender">
	<to value="foo@bar.com" />
	<from value="baz@bar.com" />
	<subject value="Some subject" />
	<smtpHost value="smtp.gmail.com" />
	<authentication value="Basic" />
...

If you try to connect to gmail without SSL you will receive a message like the following:

log4net:ERROR [SmtpAppender] Error occurred while sending e-mail notification.
System.Net.Mail.SmtpException: The SMTP server requires a secure connection or the client was not authenticated. The server response was: 5.7.0 Must issue a STARTTLS command first. o47sm1936887yhn.72
   at System.Net.Mail.MailCommand.CheckResponse(SmtpStatusCode statusCode, String response)
   at System.Net.Mail.MailCommand.Send(SmtpConnection conn, Byte[] command, String from)
   at System.Net.Mail.SmtpTransport.SendMail(MailAddress sender, MailAddressCollection recipients, String deliveryNotify, SmtpFailedRecipientException& exception)
   at System.Net.Mail.SmtpClient.Send(MailMessage message)
   at log4net.Appender.SmtpAppender.SendEmail(String messageBody)

In this case the problem is the latest official release of log4net (version 1.2.10 at the time of this posting) does not provide a way to configure the SmtpAppender to send mail using SSL.

It turns out a fix for this problem was checked in to the log4Net subversion repository in June 2008 but no official build contains the fix.

One solution is to get the latest log4net source code from its subversion repository, compile for .Net 2.0, and you’re in business… but that might cause you other headaches if they unexpectedly changed something else that you were using.

The alternative is to download the source of the latest public release (1.2.10) and patch it yourself as follows:

The fix involves making two small changes to the log4net code, and adding a corresponding value to your log4net configuration.

1) download the source code from the Apache web site
2) unzip it
3) open the sln file (under src) with Visual Studio
4) find the SmtpAppender class (under log4net/Appender)
5) search in that class for “public MailPriority Priority”
6) add the following new property after that it:

public bool EnableSsl { get; set; }

7) search for “smtpClient.Send(mailMessage)”

Notice that this segment of code is grayed-out. That’s because the project has a conditional compilation symbol named NET_1_0 that is enabled by default and this code segment is enclosed in a conditional compilation symbol named NET_2_0. We need to enable this segment of code. To do that:

7a) right click on the log4net project in the solution explorer and select “Properties”
7b) change the conditional compilation symbol version

screen shot of changing the compilation symbol to NET_2_0

7c) close the properties tab
7d) the code segment should no longer appear grayed-out
7e) above the “smtpClient.Send” line you searched for add the following:

	smtpClient.EnableSsl = EnableSsl;

8 ) when you try to compile you’ll get an error in XmlConfigurator related to System.Configuration.ConfigurationManager. This is because the project does not have a reference to the System.Configuration assembly. Add the reference.

9) you’ll also get an error related to a missing NUnit reference in the test project. You can add one and run the tests – several fail by the way but none are related to the change we made, they were checked in that way. Seriously. Anyway, you could also skip that part and compile only the log4net project.

10) replace the log4net assembly in your project with the one from build\bin\net\1.0\debug

11) update the SmtpAppender segment of your app.config to include the EnableSsl setting

<appender name="SmtpAppender" type="log4net.Appender.SmtpAppender">
	<to value="foo@bar.com" />
	<from value="baz@bar.com" />
	<subject value="Some subject" />
	<smtpHost value="smtp.gmail.com" />
	<authentication value="Basic" />
	<port value="587" />
	<username value="gmail user name" />
	<password value="gmail password" />
	<bufferSize value="1" />
	<EnableSsl value="true"/>
	<lossy value="true" />
	<evaluator type="log4net.Core.LevelEvaluator">
		<threshold value="ERROR"/>
	</evaluator>
	<layout type="log4net.Layout.PatternLayout">
		<conversionPattern value="%newline%date [%thread] %-5level %logger [%property{NDC}] - %message%newline%newline%newline" />
	</layout>
</appender>

12) recompile your project if necessary
13) trigger whatever it is that you expect to cause something to be logged to log4net and it should work.

If it still doesn’t work, make sure you have a reference to SmtpAppender in one of your configured log4net categories, and/or in the root category.

<root>
    <level value="WARN"/>
    <appender-ref ref="RollingLogFileAppender"/>
    <appender-ref ref="SmtpAppender"/>
</root>

And that the log level you are using in your code is at least as high as the highest one configured in the SmtpAppender and related category.

...
	<lossy value="true" />
	<evaluator type="log4net.Core.LevelEvaluator">
		<threshold value="ERROR"/>
	</evaluator>
	<layout type="log4net.Layout.PatternLayout">
		<conversionPattern value="%newline%date [%thread] %-5level %logger [%property{NDC}] - %message%newline%newline%newline" />
	</layout>
</appender>
<root>
    <level value="WARN"/>
    <appender-ref ref="RollingLogFileAppender"/>
    <appender-ref ref="SmtpAppender"/>
</root>

Read Full Post »

In Part 2 of this series we improved our genetic solver to handle the 8 Queens puzzle. Now let’s look at a more useful problem, that of route optimization. Imagine we are running a package delivery business. We’d want to minimize costs so that we could maximize profits. Wages are a fixed cost so we can’t eliminate them but we can try to get the most out of them by being efficient about assigning areas to drivers. Maybe one driver delivers to North side addresses and another to South side addresses. Fuel is the next highest cost so we may want to maximize our miles per gallon by considering terrain, road construction, traffic patterns, and distance between stops. Should we send out larger, full trucks and incur the cost of moving all that weight from stop to stop, or would it be more efficient to distribute loads among more trucks? And what about high priority packages? These are just a few of the parameters that one might have to weigh and optimize when running such a business. We can summarize the problem as follows:

Find the route that visits all locations such that rewards are maximized and costs are minimized

This is also known as the Travelling Salesman Problem and there are many ways to solve it. One difficulty with this problem is it is hard for a person to look at a collection of points connected by lines with costs and verify that it represents the shortest route or most efficient route.

XKCD comic for the Travelling Salesman problem

Let’s carefully construct our scenario so that we can visually check our solver’s progress towards an expected solution. We’ll do that by laying out our points in a circle.

......................*.................
........................................
..................b...a.................
..............c.........................
........................................
..........d...................z.........
........................................
.......e.........................y......
........................................
........................................
........................................
....f...............................x...
........................................
........................................
........................................
..g...................................w.
........................................
........................................
........................................
..h...................................v.
........................................
........................................
........................................
..i...................................u.
........................................
........................................
........................................
...j.................................t..
........................................
........................................
........................................
.....k.............................s....
........................................
........................................
........l.......................r.......
........................................
............m...............q...........
........................................
................n.......p...............
....................o...................

* => home base

This will let us visualize how close our genetic solution is to the expected solution.

We’ll start off with the final code from the previous part in this series.

First put the points on the circle such that there is only one correct path:

    public class RouteOptimizationSolver
    {
        private const string GeneSet = "abcdefghijklmnopqrstuvwxyz";
        private const int Radius = 20;

        public string Solve()
        {

            int pointOffset = 0;
            var pointLookup = GetNPointsOnCircle(new Point(Radius, Radius), Radius, GeneSet.Length)
                .ToDictionary(x => GeneSet[pointOffset++], x => x);

            //...
        }

        public IList<Point> GetNPointsOnCircle(Point center, int radius, int n)
        {
            double alpha = Math.PI*2/(n+1);
            var points = new List<Point>(n);
            var maxArrayIndex = 2 * radius - 1;
            Func<int, int> forceInBounds = x => Math.Min(Math.Max(0, x), maxArrayIndex);
            int i = n/2;
            while (i < 3*n/2)
            {
                double theta = alpha*i++;
                int x = (int) (Math.Cos(theta)*radius);
                int y = (int) (Math.Sin(theta)*radius);
                var pointOnCircle = new Point(forceInBounds(center.X + x), forceInBounds(center.Y + y));
                points.Add(pointOnCircle);

            }

            return points;
        }
    }

Our gene set will be the letters [a-z] associated with the points on the map. The gene sequence, in the order generated, will be the route. The fitness function will calculate the distance between the points, including from home base to the first point, and back to home base from the final point. Also, we’ll incorporate what we learned from the 8 Queens puzzle by adding a penalty for revisiting locations.

        private static int CalculateRouteLength(
            IEnumerable<char> sequence,
            IDictionary<char, Point> pointLookup)
        {
            var points = sequence.Select(x => pointLookup[x]).ToList();
            int distinctCount = points.Distinct().Count();
            var home = new Point(0, pointLookup['a'].Y);
            points.Insert(0, home);
            points.Add(home);
            double routeLength = points
                .Select((x, i) => i == 0 ? 0 : GetDistance(x, points[i - 1]))
                .Sum();
            int fitness =
                1000 * (pointLookup.Count - distinctCount)
                + (int)Math.Floor(routeLength);
            return fitness;
        }

        private static double GetDistance(Point pointA, Point pointB)
        {
            int sideA = pointA.X - pointB.X;
            int sideB = pointA.Y - pointB.Y;
            double sideC = Math.Sqrt(sideA * sideA + sideB * sideB);
            return sideC;
        }

When we move the display function out to a helper method the Solve method becomes short and fairly easy to read.

        public void Solve()
        {
            int pointOffset = 0;
            var pointLookup = GetNPointsOnCircle(new Point(Radius, Radius), Radius, GeneSet.Length)
                .ToDictionary(x => GeneSet[pointOffset++], x => x);

            Console.WriteLine("Expect optimal route "+GeneSet+" to have fitness "+CalculateRouteLength(GeneSet, pointLookup));

            var stopwatch = new Stopwatch();
            stopwatch.Start();

            string result = new GeneticSolver().GetBest(
                GeneSet.Length,
                GeneSet,
                sequence => CalculateRouteLength(sequence, pointLookup),
                (generation, fitness, genes) => Display(generation, fitness, genes, stopwatch));
            Console.WriteLine(result);

            return result;
        }

        private static void Display(int generation, int fitness, string sequence, Stopwatch stopwatch)
        {
            Console.WriteLine("generation {0} fitness {1} {2}  elapsed: {3}",
                  generation.ToString().PadLeft(5, ' '),
                  fitness.ToString().PadLeft(5, ' '),
                  sequence,
                  stopwatch.Elapsed);
        }

That’s it, let’s run it and see what happens.

Expect optimal route abcdefghijklmnopqrstuvwxyz to have fitness 122
generation     1 fitness 12685 kwryvdvyxanwvergavradqunqs  elapsed: 00:00:00.0262491
generation     1 fitness  7654 zesedmqncbeucbzdtwlkyhpvyj  elapsed: 00:00:00.0293974
generation     1 fitness  7651 vjcnurjhleecqbdzxpvxbgswja  elapsed: 00:00:00.0295576
generation     1 fitness  7637 xxmnztreelluapcykhaywddyov  elapsed: 00:00:00.0299583
generation     1 fitness  7606 gwuvqldatidupokbkivjmpmexy  elapsed: 00:00:00.0300911
generation     1 fitness  6633 gwuvqldatidupokzrivjmpmexy  elapsed: 00:00:00.0302366
generation     2 fitness  6603 gwuvqldatidupokzryvjmpmexy  elapsed: 00:00:00.0315727
generation     3 fitness  5604 gwuvqlhatidupokzryvjmpmexy  elapsed: 00:00:00.0322540
generation     5 fitness  5587 gwuvqldatidupokzryvjmnmexy  elapsed: 00:00:00.0338223
generation     5 fitness  5569 gwuvqldatidupokzrihjmpmexy  elapsed: 00:00:00.0341191
generation     8 fitness  4574 gwuvqldatidcpokzryvjmnmexy  elapsed: 00:00:00.0362104
generation    11 fitness  4544 gwuvqldatidcpokzbyvjmnmexy  elapsed: 00:00:00.0381409
generation    15 fitness  3589 gwuhqldatidcpokzbyvjmnmexy  elapsed: 00:00:00.0408708
generation    19 fitness  3566 gwuvqldatidcpokzryhjmnmexy  elapsed: 00:00:00.0437878
generation    21 fitness  3543 gwuvqldatidcpokzrohjmnmexy  elapsed: 00:00:00.0454392
generation    23 fitness  3538 gwuvqbdatidcpokzrohjmnmexy  elapsed: 00:00:00.0462299
generation    36 fitness  3521 gwuvqbdatidcpokzrohjmnmsxy  elapsed: 00:00:00.0558536
generation    40 fitness  2584 gwuvqbdatiscpokzrohjmnmexy  elapsed: 00:00:00.0599894
generation    45 fitness  2560 gwuvqbdatilcpokzrohjmnmexy  elapsed: 00:00:00.0624861
generation    47 fitness  2521 gwuvqbdatidcpokzrohjlnmsxy  elapsed: 00:00:00.0637021
generation    60 fitness  2516 gwuvqbdatidcpokzrhhjlnmsxy  elapsed: 00:00:00.0724209
generation    80 fitness  2487 gwuvqbdafidcpokzrohjlnmsxy  elapsed: 00:00:00.0821201
generation    83 fitness  2486 gwuvqbdafidcpokzrohjlnmtxy  elapsed: 00:00:00.0836240
generation    86 fitness  1522 gwuvqfdatiecpokzrohjlnmsxy  elapsed: 00:00:00.0851899
generation    90 fitness  1514 gwuvqfdatbecpokzrohjlnmsxy  elapsed: 00:00:00.0883483
generation   112 fitness  1509 gwuvqfdatbecpokzrrhjlnmsxy  elapsed: 00:00:00.0992485
generation   122 fitness  1507 gwuvifdatbecpokzrohjlnmsxy  elapsed: 00:00:00.1052185
generation   158 fitness  1505 gwuvifdatbecpokzqohjlnmsxy  elapsed: 00:00:00.1234463
generation   165 fitness  1504 gwuvifdatbecpokzqphjlnmsxy  elapsed: 00:00:00.1295294
generation   181 fitness   515 gwuvifdatbecpqkzrohjlnmsxy  elapsed: 00:00:00.1379326
generation   197 fitness   511 gwuvifdatbecqpkzrohjlnmsxy  elapsed: 00:00:00.1456009
generation   355 fitness   504 gwuvifdatbecpokzrqhjlnmsxy  elapsed: 00:00:00.2126361

The location penalty worked great but once the duplicates were gone we made no progress. Why? We only have two genetic strategies Crossover and Mutation, neither of which can help us beyond this point. Mutation replaces a gene with a random one and Crossover copies genes from one parent to another but we already have all the genes, they’re just in the wrong order. Ah! That’s the key to solving this problem, order matters! It mattered with the Hello World! problem too but in that case we allowed duplication. In the 8 Queens puzzle position mattered but order didn’t. To solve this problem we need a new strategy, one that affects gene order.

Before we add the new strategy let’s think about how we’re going to fit it in.

Should it come after Crossover or before? Maybe it should come after Mutation. The power of genetic systems, as we’ve seen, comes from their randomness. Instead of hard coding the order in which we apply strategies, let’s let the characteristics of the problem being solved decide which one we should check first. If Mutation is giving us improvements, go with that first, for example. Also, a rule of refactoring will help us here. When you find yourself doing something three times, refactor to a pattern. We have three strategies at the moment and we may add more later, let’s pull out a pattern.

    public interface IGeneticStrategy
    {
        string Description { get; }
        Individual CreateChild(Individual parentA, Individual parentB, string geneSet);
    }

We can put common things in an abstract base class

    public abstract class AbstractStrategy
    {
        protected readonly Random Random;
        private string _description;

        protected AbstractStrategy()
        {
            Random = new Random();
        }

        public string Description
        {
            get
            {
                if (_description == null)
                {
                    _description = GetType().Name.Replace("Strategy", "");
                }
                return _description;
            }
        }
    }

and create classes from each of our strategy methods.

    public class CrossoverStrategy : AbstractStrategy, IGeneticStrategy
    {
        public Individual CreateChild(Individual parentA, Individual parentB, string geneSet)
        {
            int crossOverPoint = Random.Next(parentA.Genes.Length);
            var childGenes = parentA.Genes.ToCharArray();
            var parentBGenes = parentB.Genes.ToCharArray();
            childGenes[crossOverPoint] = parentBGenes[crossOverPoint];
            var child = new Individual
                {
                    Genes = new String(childGenes),
                };
            return child;
        }
    }

    public class MutationStrategy : AbstractStrategy, IGeneticStrategy
    {
        public Individual CreateChild(Individual parentA, Individual parentB, string geneSet)
        {
            var parentGenes = parentA.Genes.ToCharArray();
            int location = Random.Next(0, parentGenes.Length);
            var geneNumber = Random.Next(0, geneSet.Length);
            parentGenes[location] = geneSet[geneNumber];
            return new Individual
                {
                    Genes = new String(parentGenes),
                };
        }
    }

As a bonus, this refactoring helps us move towards code that complies with the Open-Closed Principle. To avoid hard coding the strategies we’ll get them through reflection in the constructor – these could also be injected through IoC of course. We’ll also need a default strategy to associate with the first set of parents. We’ll migrate GenerateSequence() to this fourth strategy class and call it RandomStrategy.

    public class GeneticSolver
    {
        private readonly Random _random = new Random();
        private readonly IGeneticStrategy[] _strategies;
        private RandomStrategy _randomStrategy;

        public GeneticSolver()
        {
            _strategies = (from t in Assembly.GetExecutingAssembly().GetTypes()
                           where t.GetInterfaces().Contains(typeof(IGeneticStrategy))
                           where t.GetConstructor(Type.EmptyTypes) != null
                           select Activator.CreateInstance(t) as IGeneticStrategy).ToArray();
        }

        private IEnumerable<Individual> GenerateParents(string geneSet)
        {
            Func<Individual> next = () => _randomStrategy.CreateChild(null, null, geneSet);
            var initial = next();
            return initial.Generate(next);
        }

    public class RandomStrategy : AbstractStrategy, IGeneticStrategy
    {
        private readonly int _length;

        public RandomStrategy(int length)
        {
            _length = length;
        }

        public Individual CreateChild(Individual parentA, Individual parentB, string geneSet)
        {
            return new Individual
                {
                    Genes = GenerateSequence(geneSet),
                };
        }

        private string GenerateSequence(string geneSet)
        {
            Func<char> next = () => geneSet[Random.Next(0, geneSet.Length)];
            char initial = next();
            return new String(initial.Generate(next).Take(_length).ToArray());
        }
    }

and recreate the RandomStrategy instance whenever GetBest() is called.

        public string GetBest(int length,
                              string geneSet,
                              Func<string, int> getFitness,
                              Action<int, int, string> displayChild)
        {
            _randomStrategy = new RandomStrategy(length);

Now we can use a strategy until it fails to provide improvement, then pick a different strategy randomly.

        public string GetBest(int length,
                              string geneSet,
                              Func<string, int> getFitness,
                              Action<int, int, string> displayChild)
        {
            _randomStrategy = new RandomStrategy(length);

            int maxIndividualsInPool = geneSet.Length * 3;
            int generationCount = 1;
            var uniqueIndividuals = new HashSet<string>();
            var parents = GenerateParents(geneSet)
                .Where(x => uniqueIndividuals.Add(x.Genes))
                .Take(maxIndividualsInPool)
                .ToList();
            foreach (var individual in parents)
            {
                individual.Fitness = getFitness(individual.Genes);
            }
            parents = parents.OrderBy(x => x.Fitness).ToList();

            displayChild(generationCount, parents.Last().Fitness, parents.Last().Genes);

            int worstParentFitness = parents.Last().Fitness;

            IGeneticStrategy strategy = _randomStrategy;
            var children = GenerateChildren(parents, strategy, geneSet);
            do
            {
                var improved = new List<Individual>();
                foreach (var child in children.Where(x => uniqueIndividuals.Add(x.Genes)))
                {
                    child.Fitness = getFitness(child.Genes);
                    if (worstParentFitness >= child.Fitness)
                    {
                        improved.Add(child);
                        if (worstParentFitness > child.Fitness)
                        {
                            displayChild(generationCount, child.Fitness, child.Genes);
                            worstParentFitness = child.Fitness;
                        }
                    }
                }
                generationCount++;
                if (improved.Any())
                {
                    parents = parents
                        .Concat(improved)
                        .OrderBy(x => x.Fitness)
                        .Take(maxIndividualsInPool)
                        .ToList();
                    children = GenerateChildren(parents, strategy, geneSet);
                }
                else
                {
                    var previousStrategy = strategy;
                    strategy = _strategies
                        .Where(x => !ReferenceEquals(x, previousStrategy))
                        .Shuffle()
                        .First();
                    children = GenerateChildren(parents, strategy, geneSet);
                }
            } while (parents[0].Fitness > 0);
            return parents[0].Genes;
        }

Shuffle has the following implementation:

    public static class IEnumerableExtensions
    {
        private static readonly Random _rand = new Random();

        public static IEnumerable<T> Shuffle<T>(this IEnumerable<T> source)
        {
            var items = source == null ? new T[] { } : source.ToArray();

            for (int i = 0; i < items.Length; i++)
            {
                int toReturn = _rand.Next(i, items.Length);
                yield return items[toReturn];
                items[toReturn] = items[i];
            }
        }
    }

and GenerateChildren becomes

        private IEnumerable<Individual> GenerateChildren(
            IList<Individual> parents,
            IGeneticStrategy strategy,
            string geneSet)
        {
            int count = 0;
            while (count < parents.Count)
            {
                int parentAIndex = _random.Next(parents.Count);
                int parentBIndex = _random.Next(parents.Count);
                if (parentAIndex == parentBIndex)
                {
                    continue;
                }
                var parentA = parents[parentAIndex];
                var parentB = parents[parentBIndex];
                yield return strategy.CreateChild(parentA, parentB, geneSet);
                count++;
            }
        }

While we’re refactoring let’s add the name of the Strategy that generated the child to the child when we create it

    public class Individual
    {
        public string Genes;
        public int Fitness;
        public IGeneticStrategy Strategy;
    }

    public class CrossoverStrategy : AbstractStrategy, IGeneticStrategy
    {
        public Individual CreateChild(Individual parentA, Individual parentB, string geneSet)
        {
            int crossOverPoint = Random.Next(parentA.Genes.Length);
            var childGenes = parentA.Genes.ToCharArray();
            var parentBGenes = parentB.Genes.ToCharArray();
            childGenes[crossOverPoint] = parentBGenes[crossOverPoint];
            var child = new Individual
                {
                    Genes = new String(childGenes),
                    Strategy = this
                };
            return child;
        }
    }
	
    public class MutationStrategy : AbstractStrategy, IGeneticStrategy
    {
        public Individual CreateChild(Individual parentA, Individual parentB, string geneSet)
        {
            var parentGenes = parentA.Genes.ToCharArray();
            int location = Random.Next(0, parentGenes.Length);
            parentGenes[location] = geneSet[Random.Next(0, geneSet.Length)];
            return new Individual
                {
                    Genes = new String(parentGenes),
                    Strategy = this
                };
        }
    }	

    public class RandomStrategy : AbstractStrategy, IGeneticStrategy
    {
        private readonly int _length;

        public RandomStrategy(int length)
        {
            _length = length;
        }

        public Individual CreateChild(Individual parentA, Individual parentB, string geneSet)
        {
            return new Individual
                {
                    Genes = GenerateSequence(geneSet),
                    Strategy = this
                };
        }

        private string GenerateSequence(string geneSet)
        {
            Func<char> next = () => geneSet[Random.Next(0, geneSet.Length)];
            char initial = next();
            return new String(initial.Generate(next).Take(_length).ToArray());
        }
    }

Also, now that we know which strategy produced a child, let’s pass that to the display function too. It might be useful to know which strategies are most successful at various points while solving a problem.

        public string GetBest(int length,
                              string geneSet,
                              Func<string, int> getFitness,
                              Action<int, int, string, string> displayChild)
        {
            _randomStrategy = new RandomStrategy(length);

            int maxIndividualsInPool = geneSet.Length * 3;
            int generationCount = 1;
            var uniqueIndividuals = new HashSet<string>();
            var parents = GenerateParents(geneSet)
                .Where(x => uniqueIndividuals.Add(x.Genes))
                .Take(maxIndividualsInPool)
                .ToList();
            foreach (var individual in parents)
            {
                individual.Fitness = getFitness(individual.Genes);
            }
            parents = parents.OrderBy(x => x.Fitness).ToList();

            var worstParent = parents.Last();
            displayChild(generationCount,
                         worstParent.Fitness,
                         worstParent.Genes,
                         worstParent.Strategy.Description);

            int worstParentFitness = worstParent.Fitness;

            IGeneticStrategy strategy = _randomStrategy;
            var children = GenerateChildren(parents, strategy, geneSet);
            do
            {
                var improved = new List<Individual>();
                foreach (var child in children.Where(x => uniqueIndividuals.Add(x.Genes)))
                {
                    child.Fitness = getFitness(child.Genes);
                    if (worstParentFitness >= child.Fitness)
                    {
                        improved.Add(child);
                        if (worstParentFitness > child.Fitness)
                        {
                            displayChild(generationCount,
                                         child.Fitness,
                                         child.Genes,
                                         child.Strategy.Description);
                            worstParentFitness = child.Fitness;
                        }

This requires a corresponding change to our RouteOptimizationSolver.

        private static void Display(int generation, int fitness, string sequence, string strategy, Stopwatch stopwatch)
        {
            Console.WriteLine("generation {0} fitness {1} {2}  elapsed: {3} by {4}",
                              generation.ToString().PadLeft(5, ' '),
                              fitness.ToString().PadLeft(5, ' '),
                              sequence,
                              stopwatch.Elapsed,
                              strategy);
        }

        public string Solve()
        {
            int pointOffset = 0;
            var pointLookup = GetNPointsOnCircle(new Point(Radius, Radius), Radius, GeneSet.Length)
                .ToDictionary(x => GeneSet[pointOffset++], x => x);

            Console.WriteLine("Expect optimal route " + GeneSet + " to have fitness " + CalculateRouteLength(GeneSet, pointLookup));

            var stopwatch = new Stopwatch();
            stopwatch.Start();

            string result = new GeneticSolver().GetBest(
                GeneSet.Length,
                GeneSet,
                child => CalculateRouteLength(child, pointLookup),
                (generation, fitness, item, strategy) => Display(generation, fitness, item, strategy, stopwatch));
            Console.WriteLine(result);

            return result;
        }

Now we’re ready to add the new Strategy. Let’s call this strategy Swap because it picks two random locations and swaps their genes to produce a new child.

    public class SwapStrategy : AbstractStrategy, IGeneticStrategy
    {
        public Individual CreateChild(Individual parentA, Individual parentB, string geneSet)
        {
            int swapPointA = Random.Next(parentA.Genes.Length);
            int swapPointB = Random.Next(parentA.Genes.Length);
            if (swapPointA == swapPointB)
            {
                swapPointB = Random.Next(parentA.Genes.Length);
                if (swapPointA == swapPointB)
                {
                    return parentA;
                }
            }
            var childGenes = parentA.Genes.ToCharArray();
            char temp = childGenes[swapPointA];
            childGenes[swapPointA] = childGenes[swapPointB];
            childGenes[swapPointB] = temp;
            var child = new Individual
                {
                    Genes = new String(childGenes),
                    Strategy = this
                };
            return child;
        }
    }

We now have four strategies and we’re ready to run again.

Expect optimal route abcdefghijklmnopqrstuvwxyz to have fitness 122
generation     1 fitness 13783 sihfdgrydirararoylxlxdgrlt  elapsed: 00:00:00.0301138 by Random
generation     1 fitness 11716 tukovqkrpnoeqglrbnfallgrfo  elapsed: 00:00:00.0334027 by Swap
generation     1 fitness 11709 tukovqkrpqoenglrbnlalfgrfo  elapsed: 00:00:00.0336371 by Swap
generation     1 fitness  7630 wxqpgirnwdqujjkvjyzgskohdc  elapsed: 00:00:00.0337776 by Swap
generation     1 fitness  6692 xqvdhyvpabxhlokitjsczshgbm  elapsed: 00:00:00.0339528 by Swap
generation     1 fitness  6607 tjsrqlewbnmpvhzydtrfkrrcbd  elapsed: 00:00:00.0344058 by Swap
generation     2 fitness  6599 tjsrqlewknmpvhzydcrfbrrtbd  elapsed: 00:00:00.0353516 by Swap
generation     2 fitness  6589 tjsrqlewbnmpvhzybtrfkrrcdd  elapsed: 00:00:00.0356634 by Swap
generation     2 fitness  5657 vwnoefbjaaxzsjmzfqgrnpiytl  elapsed: 00:00:00.0359059 by Swap
generation     3 fitness  5626 vwnoefbjaaxzsjmzflgrnpiytq  elapsed: 00:00:00.0362412 by Swap
generation     4 fitness  5593 vwnoefbgaaxzsjmzfljrnpiytq  elapsed: 00:00:00.0368606 by Swap
generation     8 fitness  5577 vwnjefbgaaxzsjmzflornpiytq  elapsed: 00:00:00.0446378 by Swap
generation    13 fitness  4733 vwfoenbjaaxzskmzfqgrnpiytl  elapsed: 00:00:00.0477396 by Mutation
generation    14 fitness  4729 vwfoembjaaxzskmzfqgrnpiytl  elapsed: 00:00:00.0487204 by Mutation
generation    15 fitness  3675 vwfoedbjaaxzskmzfqgrnpiytl  elapsed: 00:00:00.0493017 by Mutation
generation    19 fitness  3662 vwfoedbjaaxzckmzfqgrnpiytl  elapsed: 00:00:00.0522872 by Crossover
generation    23 fitness  3628 vwfoedbjaaxzckmjfqgrnpiytl  elapsed: 00:00:00.0601834 by Mutation
generation    25 fitness  3613 vwfoedbjaaxzckmjfqgrnpihtl  elapsed: 00:00:00.0613455 by Crossover
generation    28 fitness  3580 vwxoedbjaaxzckmjfqgrnpihtl  elapsed: 00:00:00.0632559 by Mutation
generation    38 fitness  2658 vwfoedbjaaxuskmzfqgrnpiytl  elapsed: 00:00:00.0694580 by Crossover
generation    44 fitness  2576 vwuoedbjaaxzckmjfqgrnpihtl  elapsed: 00:00:00.0724551 by Mutation
generation    48 fitness  2570 vwuoedbaajxzckmjfqgrnpihtl  elapsed: 00:00:00.0742243 by Swap
generation    49 fitness  2561 vwuoedbaahxzckmjfqgrnpijtl  elapsed: 00:00:00.0750457 by Swap
generation    50 fitness  2553 vwuojdbaaexzckmjfqgrnpihtl  elapsed: 00:00:00.0756789 by Swap
generation    50 fitness  2529 vwuoedbaajxzckmjfqlrnpihtg  elapsed: 00:00:00.0763815 by Swap
generation    54 fitness  2523 vwuoedbacjxzakmjiqlrnpfhtg  elapsed: 00:00:00.0783492 by Swap
generation    57 fitness  2522 vwuxedbacjozakmjfqlrnpihtg  elapsed: 00:00:00.0802377 by Swap
generation    58 fitness  2514 vwuxedbacjqzakmjfolrnpihtg  elapsed: 00:00:00.0815207 by Swap
generation    60 fitness  1576 vwuoedbjayxzckmjfqgrnpihtl  elapsed: 00:00:00.0825285 by Crossover
generation    64 fitness  1559 vwuxedbacjozakmyfqlrnpihtg  elapsed: 00:00:00.0848008 by Mutation
generation    65 fitness  1538 vwuxedbacjozskmjfqlrnpihtg  elapsed: 00:00:00.0855306 by Mutation
generation    67 fitness  1519 vwuxedbacjozakmyfqlrnpthig  elapsed: 00:00:00.0863840 by Swap
generation    86 fitness  1513 wvuxedbacjozakmyfqlrnpthig  elapsed: 00:00:00.0975290 by Swap
generation    87 fitness  1499 wvuxedbacjkzaomyfqlrnpthig  elapsed: 00:00:00.0983235 by Swap
generation    93 fitness  1468 wvuxedbacjkzaomyflqrnpthig  elapsed: 00:00:00.1011205 by Swap
generation   104 fitness  1462 wvuxedbacjkzaoryfqlmnpthig  elapsed: 00:00:00.1070200 by Swap
generation   116 fitness  1461 wvuxedbacjkoazryfqlmnpthig  elapsed: 00:00:00.1126585 by Swap
generation   144 fitness  1459 wvuxedbacjkorzryfqmlnpthig  elapsed: 00:00:00.1273411 by Mutation
generation   147 fitness  1446 wvuxedbacjkqrzryfomlnpthig  elapsed: 00:00:00.1291495 by Swap
generation   148 fitness  1432 wvuxedbacjkorztyfqmlnprhig  elapsed: 00:00:00.1298990 by Swap
generation   153 fitness  1418 wvuxedbacjkqrzvyfomlnpthig  elapsed: 00:00:00.1323602 by Crossover
generation   174 fitness  1417 wvvxedbacjkqrzuyfomlnpthig  elapsed: 00:00:00.1456486 by Swap
generation   182 fitness  1403 wvvxydbacjkqrzuefomlnpthig  elapsed: 00:00:00.1489656 by Swap
generation   182 fitness  1390 wvufedbacjkqrzvyxomlnpthig  elapsed: 00:00:00.1494337 by Swap
generation   184 fitness   459 wzuxedbacjkosvryfqmlnpthig  elapsed: 00:00:00.1504152 by Crossover
generation   223 fitness   437 wzuxedbacjktsvryfqmlnpohig  elapsed: 00:00:00.1690261 by Swap
generation   261 fitness   429 wuzxedbacjktsvryfqmlnpohig  elapsed: 00:00:00.1861103 by Swap
generation   266 fitness   417 wuzxedbacjkfsvrytqmlnpohig  elapsed: 00:00:00.1888094 by Swap
generation   273 fitness   412 euzxwdbacjkfsvrytqmlnpohig  elapsed: 00:00:00.1930587 by Swap
generation   294 fitness   384 wuzfedbacjktsvryxqmlnpohig  elapsed: 00:00:00.2027860 by Swap
generation   437 fitness   364 wurfedbacjktsvzyxqmlnpohig  elapsed: 00:00:00.2605055 by Swap
generation   458 fitness   358 wurfedbacjktsvzyxqmlnpoihg  elapsed: 00:00:00.2684933 by Swap
generation   569 fitness   357 wurfedbacjktsvzyxqmlnopihg  elapsed: 00:00:00.3043946 by Swap
generation   576 fitness   352 wurfedbacjkstvzyxqmlopnihg  elapsed: 00:00:00.3073521 by Swap
generation   647 fitness   344 wutfedbacjkrsvzyxqmlnpoihg  elapsed: 00:00:00.3291617 by Swap
generation   648 fitness   343 wutfedbacjkrsvzyxqmlnopihg  elapsed: 00:00:00.3295613 by Swap
generation   655 fitness   340 wutfedbacjkrsvxyzqmlnpoihg  elapsed: 00:00:00.3324795 by Swap
generation   656 fitness   338 wuvfedbacjkrstxyzqmlnpoihg  elapsed: 00:00:00.3331220 by Swap
generation   743 fitness   335 wvufedbacjkrstxyzqmlnpoihg  elapsed: 00:00:00.3608762 by Swap
generation   926 fitness   333 fuvwedbacjkrstxyzqmlnpoihg  elapsed: 00:00:00.4152525 by Swap
generation  1189 fitness   331 zutfedbacjprsvwyxqmlnokihg  elapsed: 00:00:00.4800786 by Swap
generation  1224 fitness   329 zuvfedbacjprstwyxqmlnokihg  elapsed: 00:00:00.4869162 by Swap
generation  1283 fitness   319 zuvfedbacoprstwyxqmlnjkihg  elapsed: 00:00:00.4992418 by Swap
generation  1349 fitness   311 zuvfedbacoprstwyxqmlnkjihg  elapsed: 00:00:00.5149091 by Swap
generation  1389 fitness   302 zuvfedbacoprstwyxqnlmkjihg  elapsed: 00:00:00.5224549 by Swap
generation  1495 fitness   301 zuvfedbacoqrstwyxpnlmkjihg  elapsed: 00:00:00.5442969 by Swap
generation  1560 fitness   298 zxvfedbacoprstwyuqnlmkjihg  elapsed: 00:00:00.5566014 by Swap
generation  1604 fitness   293 fuvzedbacpqrstwyxonlmkjihg  elapsed: 00:00:00.5659772 by Swap
generation  1651 fitness   289 zxvfedbacoprstwyuqnmlkjihg  elapsed: 00:00:00.5749197 by Swap
generation  1678 fitness   279 zyvfedbacoprstwxuqnmlkjihg  elapsed: 00:00:00.5805424 by Swap
generation  1741 fitness   261 zxyfedabcoprstwvuqnmlkjihg  elapsed: 00:00:00.5934050 by Swap
generation  1949 fitness   260 zxyfedabcoprstuvwqnmlkjihg  elapsed: 00:00:00.6312049 by Swap
generation  2121 fitness   258 fxyzedabcoprstwvuqnmlkjihg  elapsed: 00:00:00.6587608 by Swap
generation  2226 fitness   257 fxyzcdabeoprstwvuqnmlkjihg  elapsed: 00:00:00.6763631 by Swap
generation  2280 fitness   249 fxyzadcbeoprstwvuqnmlkjihg  elapsed: 00:00:00.6841581 by Swap
generation  2450 fitness   245 bxyzadcfeoprstwvuqnmlkjihg  elapsed: 00:00:00.7112644 by Swap
generation  2607 fitness   232 fxyzabcdeoprstwvuqnmlkjihg  elapsed: 00:00:00.7360896 by Swap
generation  2725 fitness   231 fxyzabcdeoprstuvwqnmlkjihg  elapsed: 00:00:00.7582069 by Swap
generation  3314 fitness   230 exyzabcdfoprstuvwqnmlkjihg  elapsed: 00:00:00.8291939 by Swap
generation  4357 fitness   226 exyzabcdfpqrstuvwonmlkjihg  elapsed: 00:00:00.9286172 by Swap
generation  6196 fitness   225 dxyzabcefpqrstuvwonmlkjihg  elapsed: 00:00:01.0578164 by Swap

This time we make it a lot closer to optimal fitness but it still hangs up.

This change introduces the Swap strategy and that is useful finding better solutions to the Travelling Salesperson Problem but it simultaneously reduces the effectiveness of the genetic solver when applied to the 8 Queens Puzzle. The reason is the order in which we specify the Queen locations doesn’t matter in the 8 Queens Puzzle. When we only had two strategies, Crossover and Mutation, it was extremely difficult to introduce permutations of a set of locations into the parent pool. The addition of the Swap strategy, whose job is to introduce permutations, can cause the parent pool to become full of permutations of one solution, thus loosing genetic diversity. The lack of genetic diversity in the parents can cause the solver to get stuck in an infinite loop while trying to create the children of the next generation and finding no new patterns in this line:

foreach (var child in children.Where(x => uniqueIndividuals.Add(x.Genes)))

we draw forever from the children IEnumerable and count on the source to eventually stop feeding us individuals. It only accepts the ones that aren’t in the uniqueIndividuals set. The problem is it distinguishes between ABC, CBA, and BAC but those should be treated the same in the 8 Queens Puzzle. The solution is to convert the genes to a canonical form before checking for their presence in the uniqueIndividuals set.

foreach (var child in children.Where(x => uniqueIndividuals.Add(GetCanonicalGenes(x.Genes))))

Which we’ll implement as a property whose value can be replaced as necessary

public Func<string, string> GetCanonicalGenes { private get; set; }

and initialize in the constructor to its current behavior

            GetCanonicalGenes = genes => genes;

Now in the EightQueensSolver we replace that implementation with one that converts permutations to a single form. We were already doing when returning the final result so let’s pull that out to a method:

            Console.WriteLine(result);
            return getFitness(result) == 0
                       ? GetCanonicalGenes(result)
                       : null;
        }

        public string GetCanonicalGenes(string genes)
        {
            return new String(genes.OrderBy(x => x).ToArray());
        }

and configure the genetic solver to use that method when solving the 8 Queens Puzzle.

            var geneticSolver = new GeneticSolver
                {
                    GetCanonicalGenes = GetCanonicalGenes
                };
            string result = geneticSolver.GetBest(GeneCount,
                                                        GeneSet,
                                                        getFitness,
                                                        displayCurrentBest);

When we run the EightQueensSolver now it will not become stuck in an infinite loop.

The Travelling Salesperson Problem needs a similar solution. In this case order does matter but the rotation of the order, which point comes first, does matter because our parent array can fill up with rotations and reversed rotations of comparatively few distinct parents, for example:

abcdef
bcdefa
cdefab
defabc
efabcd
fabcde
fedcba
edcbaf
dcbafe
cbafed
bafedc
afedcb

This substantially reduces the genetic diversity of our parents as we approach an optimal solution, causing the solver to search longer for improvements. We can eliminate the rotations and reverse rotations from the parent list while converting the gene sequence to a canonical one.

Now in RouteOptimizationSolver we can create a function that returns a canonical version of the gene sequence regardless of rotation and reversal. This function will work by finding the lowest alphabetic character in the gene sequence then finding all sub sequences of the genes that start with that character. Sort those and take the first one as the canonical version.

        public string GetCanonicalGenes(string input)
        {
            char first = input.OrderBy(x => x).First();
            string doubledForRotation = input + input;
            string forward = (-1).GenerateFrom(x => doubledForRotation.IndexOf(first, x + 1))
                .Skip(1)
                .TakeWhile(x => x < input.Length)
                .Select(x => doubledForRotation.Substring(x, input.Length))
                .OrderBy(x => x)
                .First();

            string reverseDoubledForRotation = new String(doubledForRotation.Reverse().ToArray());
            string backward = (-1).GenerateFrom(x => reverseDoubledForRotation.IndexOf(first, x + 1))
                .Skip(1)
                .TakeWhile(x => x < input.Length)
                .Select(x => reverseDoubledForRotation.Substring(x, input.Length))
                .OrderBy(x => x)
                .First();

            return String.Compare(forward, backward) == -1 ? forward : backward;

        }

the Reverse extension method is implemented as follows:

    public static class StringExtensions
    {
        public static string Reverse(this string s)
        {
            var charArray = s.ToCharArray();
            Array.Reverse(charArray);
            return new String(charArray);
        }
    }

proof that it works:

            var sequences = new[]
                {
                    "abcdef", "bcdefa", "cdefab",
                    "defabc", "efabcd", "fabcde",
                    "fedcba", "edcbaf", "dcbafe",
                    "cbafed", "bafedc", "afedcb",
                };
            foreach (string sequence in sequences)
            {
                Console.WriteLine(GetCanonicalGenes(sequence));
            }

output:

abcdef
abcdef
abcdef
abcdef
abcdef
abcdef
abcdef
abcdef
abcdef
abcdef
abcdef
abcdef

Now configure the RouteOptimizationSolver to call that function when we create the genetic solver.

            var geneticSolver = new GeneticSolver
                {
                    GetCanonicalGenes = GetCanonicalGenes
                };
            string result = geneticSolver.GetBest(
                GeneSet.Length,
                GeneSet,
                child => CalculateRouteLength(child, pointLookup),
                (generation, fitness, genes, strategy) => Display(generation, fitness, genes, strategy, stopwatch));

Last, we need to add the home location to the gene set. Otherwise we won’t be able to distinguish home-a..z-home from a..m-home-n-z..home. Start by adding a new symbol to the gene set and simplifying CalculateRouteLength

        private const string GeneSet = "abcdefghijklmnopqrstuvwxyz*";

        private static int CalculateRouteLength(
            IEnumerable<char> sequence,
            IDictionary<char, Point> pointLookup)
        {
            var points = sequence.Select(x => pointLookup[x]).ToList();
            int distinctCount = points.Distinct().Count();
            points.Add(points[0]);
            double routeLength = points
                .Select((x, i) => i == 0 ? 0 : GetDistance(x, points[i - 1]))
                .Sum();
            int fitness =
                1000 * (pointLookup.Count - distinctCount)
                + (int)Math.Floor(routeLength);
            return fitness;
        }

and manually add the home location in Solve.

        public string Solve()
        {
            int pointOffset = 0;
            var pointLookup = GetNPointsOnCircle(new Point(Radius, Radius), Radius, GeneSet.Length - 1)
                .ToDictionary(x => GeneSet[pointOffset++], x => x);
            pointLookup.Add('*', new Point(0, pointLookup['a'].Y));

Now when we run it

Expect optimal route abcdefghijklmnopqrstuvwxyz* to have fitness 122
...
generation  1955 fitness   220 *azydefgsrqpjhiklmnotuvwxcb  elapsed: 00:00:02.4077673 by Swap
generation  2177 fitness   214 *aycdefgsrqpjhiklmnotuvwxzb  elapsed: 00:00:02.6261045 by Swap
generation  2297 fitness   209 *azcdefgsrqmjhiklnoptuvwxyb  elapsed: 00:00:02.7445250 by Swap
generation  2382 fitness   208 *azcdefgsrqpjhiklmontuvwxyb  elapsed: 00:00:02.8262503 by Swap
generation  2415 fitness   204 *bazcdefgsrqpjhiklmnotuvwxy  elapsed: 00:00:02.8601091 by Swap
generation  2439 fitness   202 *yxwvutponlkihjmqrsgfedcbaz  elapsed: 00:00:02.8842753 by Swap
generation  2469 fitness   200 *bcdefgsrqpjhiklmontuvwxyaz  elapsed: 00:00:02.9145854 by Swap
generation  2661 fitness   190 *abcdefgsrqpjhiklmnotuvwxzy  elapsed: 00:00:03.1029689 by Swap
generation  2726 fitness   189 *abcdefgsrqpihjklmnotuvwxzy  elapsed: 00:00:03.1661209 by Swap
generation  2824 fitness   188 *abcdefgsrqphijklmnotuvwxzy  elapsed: 00:00:03.2591257 by Swap
generation  2975 fitness   181 *abcdefgsrqpjhiklmnotuvwxyz  elapsed: 00:00:03.4038675 by Swap
generation  2978 fitness   178 *abcdefgsrqphijklmnotuvwxyz  elapsed: 00:00:03.4078139 by Swap

We can see that the routes are approaching alphabetical order but it seems to reach a maximum that even Swap can’t resolve. Here’s an idea: we can modify the Crossover strategy to cross at least one gene instead of exactly one gene. This should increase its usefulness at the areas that are changing start to localize.

    public class CrossoverStrategy : AbstractStrategy, IGeneticStrategy
    {
        public Individual CreateChild(Individual parentA, Individual parentB, string geneSet)
        {
            int sourceStart = Random.Next(parentB.Genes.Length);
            int destinationStart = Random.Next(parentA.Genes.Length);
            int maxLength = Math.Min(parentA.Genes.Length - destinationStart, parentB.Genes.Length - sourceStart);
            int length = Random.Next(1, maxLength);

            var childGenes = parentA.Genes.ToCharArray();
            var parentBGenes = parentB.Genes.ToCharArray();
            Array.Copy(parentBGenes, sourceStart, childGenes, destinationStart, length);

            var child = new Individual
                {
                    Genes = new String(childGenes),
                    Strategy = this,
                };
            return child;
        }
    }

To facilitate Crossover working late in the run we need to genes to be in canonical order. We could canonicalize them in the strategy by that adds complexity to the strategy implementation. A better alternative is to canonicalize the child genes just after the child is created.

        private IEnumerable<Individual> GenerateChildren(
            IList<Individual> parents,
            IGeneticStrategy strategy,
            string geneSet)
        {
            int count = 0;
            while (count < parents.Count)
            {
                int parentAIndex = _random.Next(parents.Count);
                int parentBIndex = _random.Next(parents.Count);
                if (parentAIndex == parentBIndex)
                {
                    continue;
                }
                var parentA = parents[parentAIndex];
                var parentB = parents[parentBIndex];
                var child = strategy.CreateChild(parentA, parentB, geneSet);
                child.Genes = GetCanonicalGenes(child.Genes);
                yield return child;
                count++;
            }
        }

This means we no longer have to call it elsewhere.

        public string GetBest(int length,
                              string geneSet,
                              Func<string, int> getFitness,
                              Action<int, int, string, string> displayChild)
        {

		...
		
		                foreach (var child in children.Where(x => uniqueIndividuals.Add(x.Genes)))

Now when we run again we can see Crossover being used.

generation  1958 fitness   224 *abdefghicxwutsrkjlmnopqvyz  elapsed: 00:00:02.7610664 by Swap
generation  2070 fitness   223 *ihgfedcbazyxqpomlkjnrstuvw  elapsed: 00:00:02.8982293 by Swap
generation  2257 fitness   222 *abdefghicxwutsrjklmnopqvyz  elapsed: 00:00:03.1275442 by Swap
generation  2695 fitness   221 *abcdefghizxvutsrnjklmopqwy  elapsed: 00:00:03.6574046 by Swap
generation  2873 fitness   220 *abcdefghizxwutsrpjklmnoqvy  elapsed: 00:00:03.8744305 by Swap
generation  2925 fitness   218 *abcdefghiyxvutsrnlkjmopqwz  elapsed: 00:00:03.9367673 by Swap
generation  2932 fitness   211 *azytqponmlkjrsuvwxihgfedcb  elapsed: 00:00:03.9461746 by Crossover
generation  5514 fitness   209 *azyxqponmlkjrstuwvihgfedcb  elapsed: 00:00:06.8379846 by Swap
generation  6026 fitness   207 *abcdefghixwvutsjklmnopqryz  elapsed: 00:00:07.3791586 by Swap
generation  6054 fitness   200 *abcdefghiuqponmlkjrstvwxyz  elapsed: 00:00:07.4089853 by Crossover
generation  6219 fitness   191 *abcdefghirqponmlkjustvwxyz  elapsed: 00:00:07.5864057 by Swap
generation  6262 fitness   190 *abcdefghijqponmlkurstvwxyz  elapsed: 00:00:07.6336034 by Swap
generation  6396 fitness   185 *abcdefghijqponmlktrsuvwxyz  elapsed: 00:00:07.7772607 by Swap
generation  6438 fitness   173 *abcdefghijkponmlqtrsuvwxyz  elapsed: 00:00:07.8238502 by Swap
generation  6760 fitness   172 *abcdefghijkponmlqtsruvwxyz  elapsed: 00:00:08.1634438 by Swap

What is another strategy that would help us get the rest of the way there? What other patterns do we see in the gene sequences? The last generation has alphabetical runs, some in standard order and others in reverse order. Maybe a Reverse strategy would help the solver to get those runs together into a single sequence. Let’s add it.

    public class ReverseStrategy : AbstractStrategy, IGeneticStrategy
    {
        public Individual CreateChild(Individual parentA, Individual parentB, string geneSet)
        {
            int reversePointA = Random.Next(parentA.Genes.Length);
            int reversePointB = Random.Next(parentA.Genes.Length);
            if (reversePointA == reversePointB)
            {
                reversePointB = Random.Next(parentA.Genes.Length);
                if (reversePointA == reversePointB)
                {
                    return parentA;
                }
            }
            int min = Math.Min(reversePointA, reversePointB);
            int max = Math.Max(reversePointA, reversePointB);
            var childGenes = parentA.Genes.ToCharArray();
            for (int i = 0; i <= (max - min) / 2; i++)
            {
                int lowIndex = i + min;
                int highIndex = max - i;
                char temp = childGenes[lowIndex];
                childGenes[lowIndex] = childGenes[highIndex];
                childGenes[highIndex] = temp;
            }
            var child = new Individual
                {
                    Genes = new String(childGenes),
                    Strategy = this
                };
            return child;
        }
    }

And run it again.

Expect optimal route abcdefghijklmnopqrstuvwxyz to have fitness 122
...
generation   568 fitness   193 *cdghijklmnopqrutsvwxyfebaz  elapsed: 00:00:00.8854990 by Reverse
generation   677 fitness   187 *cfghijklmnopqrutsvwxydebaz  elapsed: 00:00:01.0251965 by Swap
generation   707 fitness   173 *yxwvsturqponmlkjihgfcdebaz  elapsed: 00:00:01.0640215 by Reverse
generation   807 fitness   171 *cfghijklmnopqrstuvwxydebaz  elapsed: 00:00:01.1898932 by Reverse
generation   875 fitness   157 *abedcfghijklmnopqrtusvwxyz  elapsed: 00:00:01.2729912 by Swap
generation  1033 fitness   155 *edabcfghijklmnopqrstuvwxyz  elapsed: 00:00:01.4652522 by Reverse
generation  1099 fitness   148 *adebcfghijklmnopqrstuvwxyz  elapsed: 00:00:01.5430023 by Reverse
generation  1420 fitness   138 *adcbefghijklmnopqrstuvwxyz  elapsed: 00:00:01.9245126 by Swap
generation  1643 fitness   130 *acbdefghijklmnopqrstuvwxyz  elapsed: 00:00:02.1771875 by Reverse
generation  2089 fitness   122 *abcdefghijklmnopqrstuvwxyz  elapsed: 00:00:02.6567226 by Reverse
generation  5512 fitness   121 *azyxwvutsrqponmlkjihgfedcb  elapsed: 00:00:05.9732778 by Reverse

Whoa, check that out. It not only solved the problem it also beat our expected best route home-[a-z]-home by discovering that the distance from b-home-a-z is shorter than the distance from b-a-home-z.

One thing we definitely need now is a stop condition so our solver won’t keep looping forever after it finds an optimal answer or plateaus. Let’s stop if we go a certain number of seconds without improvement and make this a configurable value.

        public GeneticSolver()
        {
            _strategies = (from t in Assembly.GetExecutingAssembly().GetTypes()
                           where t.GetInterfaces().Contains(typeof(IGeneticStrategy))
                           where t.GetConstructor(Type.EmptyTypes) != null
                           select Activator.CreateInstance(t) as IGeneticStrategy).ToArray();

            GetCanonicalGenes = genes => genes;
            MaxSecondsWithoutImprovement = 20;
        }

		public double MaxSecondsWithoutImprovement { get; set; }
        public string GetBest(int length,
                              string geneSet,
                              Func<string, int> getFitness,
                              Action<int, int, string, string> displayChild)
        {
            _randomStrategy = new RandomStrategy(length);

            int maxIndividualsInPool = geneSet.Length * 3;
            int generationCount = 1;
            var uniqueIndividuals = new HashSet<string>();
            var parents = GenerateParents(geneSet)
                .Where(x => uniqueIndividuals.Add(x.Genes))
                .Take(maxIndividualsInPool)
                .ToList();
            foreach (var individual in parents)
            {
                individual.Fitness = getFitness(individual.Genes);
            }
            parents = parents.OrderBy(x => x.Fitness).ToList();

            var worstParent = parents.Last();
            displayChild(generationCount,
                         worstParent.Fitness,
                         worstParent.Genes,
                         worstParent.Strategy.Description);

            int worstParentFitness = worstParent.Fitness;

            IGeneticStrategy strategy = _randomStrategy;
            var children = GenerateChildren(parents, strategy, geneSet);
            var stopwatch = new Stopwatch();
            stopwatch.Start();
            
            do
            {
                var improved = new List<Individual>();
                foreach (var child in children.Where(x => uniqueIndividuals.Add(x.Genes)))
                {
                    child.Fitness = getFitness(child.Genes);
                    if (worstParentFitness >= child.Fitness)
                    {
                        improved.Add(child);
                        if (worstParentFitness > child.Fitness)
                        {
                            displayChild(generationCount,
                                         child.Fitness,
                                         child.Genes,
                                         child.Strategy.Description);
                            worstParentFitness = child.Fitness;
                        }
                    }
                }
                generationCount++;
                if (improved.Any())
                {
                    parents = parents
                        .Concat(improved)
                        .OrderBy(x => x.Fitness)
                        .Take(maxIndividualsInPool)
                        .ToList();
                    children = GenerateChildren(parents, strategy, geneSet);
                    stopwatch.Reset();
                    stopwatch.Start();
                }
                else
                {
                    var previousStrategy = strategy;
                    strategy = _strategies
                        .Where(x => !ReferenceEquals(x, previousStrategy))
                        .Shuffle()
                        .First();
                    children = GenerateChildren(parents, strategy, geneSet);
                }
            } while (parents[0].Fitness > 0 && 
			         stopwatch.Elapsed.TotalSeconds <= MaxSecondsWithoutImprovement);
            return parents[0].Genes;
        }

That will make it stop.

Now, let’s take a look at the output from the run above, with the way GetBest() works in mind, and see if any other opportunities for improvement come to mind. Let’s start with the generation number column. It looks like we rarely get multiple improvements in a single generation. This is happening because we’re potentially dramatically raising the bar to parenthood whenever we find a child with a better fitness. Here’s our current code:

                    if (worstParentFitness >= child.Fitness)
                    {
                        improved.Add(child);
                        if (worstParentFitness > child.Fitness)
                        {
                            displayChild(generationCount,
                                         child.Fitness,
                                         child.Genes,
                                         child.Strategy.Description);
                            worstParentFitness = child.Fitness;
                        }
                    }

What happens if our parents have fitnesses [4, 5, 6, 7] and the children have fitnesses [1, 2, 3, 4]? worstParentFitness is initialized to 7. Then the first child changes it to 1. All of the remaining children have better fitness than the previous worstParentFitness but they will not be added to the improved list because our code moved the bar too quickly. This is a huge waste of genetic potential. Let’s fix it. We’ll start by adding all new children to the improved list and renaming it to potentialParents.

                var potentialParents = new List<Individual>();
                foreach (var child in children.Where(x => uniqueIndividuals.Add(x.Genes)))
                {
                    child.Fitness = getFitness(child.Genes);
                    potentialParents.Add(child);
                    if (worstParentFitness > child.Fitness)
                    {
                        displayChild(generationCount,
                                     child.Fitness,
                                     child.Genes,
                                     child.Strategy.Description);
                        worstParentFitness = child.Fitness;
                    }
                }
                generationCount++;
                if (potentialParents.Any())
                {
                    parents = parents
                        .Concat(potentialParents)
                        .OrderBy(x => x.Fitness)
                        .Take(maxIndividualsInPool)
                        .ToList();
                    children = GenerateChildren(parents, strategy, geneSet);
                    stopwatch.Reset();
                    stopwatch.Start();
                }

Then we’ll rename worstParentFitness to bestParentFitness and initialize it appropriately.

            var bestParent = parents.First();
            displayChild(generationCount,
                         bestParent.Fitness,
                         bestParent.Genes,
                         bestParent.Strategy.Description);

            int bestParentFitness = bestParent.Fitness;

            IGeneticStrategy strategy = _randomStrategy;
            var children = GenerateChildren(parents, strategy, geneSet);
            var stopwatch = new Stopwatch();
            stopwatch.Start();

            do
            {
                var potentialParents = new List<Individual>();
                foreach (var child in children.Where(x => uniqueIndividuals.Add(x.Genes)))
                {
                    child.Fitness = getFitness(child.Genes);
                    potentialParents.Add(child);
                    if (bestParentFitness > child.Fitness)
                    {
                        displayChild(generationCount,
                                     child.Fitness,
                                     child.Genes,
                                     child.Strategy.Description);
                        bestParentFitness = child.Fitness;
                    }
                }

Next we need a way to track whether we improved the worst parent fitness this generation. We also want to rebuild the parent list either way, so move it up.

                var worstParent = parents.Last();
                parents = parents
                    .Concat(potentialParents)
                    .OrderBy(x => x.Fitness)
                    .Take(maxIndividualsInPool)
                    .ToList();
                if (!ReferenceEquals(worstParent, parents.Last()))
                {
                    children = GenerateChildren(parents, strategy, geneSet);
                    stopwatch.Reset();
                    stopwatch.Start();
                }
                else
                {
                    var previousStrategy = strategy;
                    strategy = _strategies
                        .Where(x => !ReferenceEquals(x, previousStrategy))
                        .Shuffle()
                        .First();
                    children = GenerateChildren(parents, strategy, geneSet);
                }

Now run again to see if it has improved.

Expect optimal route abcdefghijklmnopqrstuvwxyz* to have fitness 122
generation     1 fitness  5660 *dyomgvhsqwcpkyyzherntjifzj  elapsed: 00:00:00.0554751 by Random
generation    15 fitness  5656 *gryuobytmmzkcfwlnrdijevqrm  elapsed: 00:00:00.0866113 by Random
generation    20 fitness  5590 *dmekaecdgxeldzhbfjqsyertuv  elapsed: 00:00:00.0938524 by Random
generation    24 fitness  4756 aegwhmozrxorcipgkbnvzsdulft  elapsed: 00:00:00.1073711 by Mutation
generation    25 fitness  4754 aegwumozjxorcipgkbnvzsdulft  elapsed: 00:00:00.1082410 by Mutation
generation    26 fitness  4702 *vfhcslalfeqzjwztuypblgiokx  elapsed: 00:00:00.1105010 by Mutation
generation    27 fitness  4641 adcxqkkufjnfosrpiblzetcgmkv  elapsed: 00:00:00.1115985 by Mutation
generation    29 fitness  4623 adcxqkkuujnfosrpiblzetcgmkv  elapsed: 00:00:00.1152134 by Mutation
generation    29 fitness  4585 *augzgysrrjhdlbfxgimwxvonkq  elapsed: 00:00:00.1156230 by Mutation
generation    29 fitness  3663 *vfhcslalfeqnjwztuypblgiokx  elapsed: 00:00:00.1163844 by Mutation
generation    31 fitness  3648 *gsyuobgtmpzfcfwlnkdijevqrm  elapsed: 00:00:00.1186918 by Mutation
generation    32 fitness  3644 *gsyxobgtmpzfcfwlnkdijevqrm  elapsed: 00:00:00.1213294 by Mutation
generation    33 fitness  3637 *gsyuobgtmpzfcfwlnkdijevqrp  elapsed: 00:00:00.1225585 by Mutation
generation    35 fitness  3622 *gsyuobgtmpzfcvwlnkdijenqrm  elapsed: 00:00:00.1253300 by Mutation
generation    35 fitness  3620 *gsyxobxtmpzfcfwlnkdijevqrm  elapsed: 00:00:00.1264864 by Mutation
generation    37 fitness  2670 *hcsladfeqzjwytuypbngiokx*v  elapsed: 00:00:00.1298927 by Mutation
generation    38 fitness  2639 *vfhcsralfeqnjwztuypblgiokx  elapsed: 00:00:00.1317705 by Mutation
generation    40 fitness  2633 *vmhcslagfeqnjwztuypbygiokx  elapsed: 00:00:00.1338222 by Mutation
generation    41 fitness  2609 *gsyxobxtupzfcswlnkdijevqrm  elapsed: 00:00:00.1354747 by Mutation
generation    43 fitness  1642 *hcsladfeqzjwyturpbngiokx*v  elapsed: 00:00:00.1395405 by Mutation
generation    51 fitness  1615 *gsyhobxtqpzfcuwlnkdijevqrm  elapsed: 00:00:00.1515294 by Mutation
generation    55 fitness  1570 *ausyxobgtmpzbcfwlikdhjevqr  elapsed: 00:00:00.1583159 by Mutation
generation    58 fitness  1553 *arqhejidknlxvcffpmtzbouysg  elapsed: 00:00:00.1628166 by Mutation
generation    63 fitness  1527 *agsyxonttmpzbcfwlikdhjevqr  elapsed: 00:00:00.1706478 by Mutation
generation    69 fitness   671 *gsyhobxtqpzfcuwlnkdiaevmrj  elapsed: 00:00:00.1799533 by Mutation
generation    69 fitness   656 *gsyhobxtqpzfcjwlnkdiaevurm  elapsed: 00:00:00.1802409 by Mutation
generation    79 fitness   633 *hsmladfeqzjwyturpbngiokxcv  elapsed: 00:00:00.1957913 by Mutation
generation    81 fitness   582 *cfzpqtxbohysgjmruveaidknlw  elapsed: 00:00:00.1991853 by Mutation
generation    95 fitness   537 *cfzpqtxsohybgjmruveaidknlw  elapsed: 00:00:00.2210440 by Swap
generation   100 fitness   533 *vcxkongibprutywahqefdjlmsz  elapsed: 00:00:00.2281731 by Swap
generation   101 fitness   531 *ayhdejivknltucfzpqwxbomrsg  elapsed: 00:00:00.2299265 by Swap
generation   102 fitness   529 *vcxmongibprutywahqefdjlksz  elapsed: 00:00:00.2320614 by Swap
generation   102 fitness   523 *cfsqptxbohyzdjmruveaigknlw  elapsed: 00:00:00.2331642 by Swap
generation   104 fitness   499 *bzfoptycvxlnkgijehqrmadswu  elapsed: 00:00:00.2362218 by Swap
generation   104 fitness   496 *cfzpqtxsoaybgjmruvehidknlw  elapsed: 00:00:00.2366325 by Swap
generation   104 fitness   453 *ayhdejigknltucfzpqwxbomrsv  elapsed: 00:00:00.2373951 by Swap
generation   108 fitness   444 *ayhdejigknltucfzpqwxbmorsv  elapsed: 00:00:00.2421276 by Swap
generation   110 fitness   441 *axqefdjlmhswvprokignbczuty  elapsed: 00:00:00.2451451 by Swap
generation   110 fitness   429 *acxvoignqprutwyjzsefhklmbd  elapsed: 00:00:00.2485260 by Swap
generation   111 fitness   415 *dcjkongiqprutywahfebvxlmsz  elapsed: 00:00:00.2492897 by Swap
generation   114 fitness   404 *acxvzignqprutwyjosefhklmbd  elapsed: 00:00:00.2539517 by Swap
generation   114 fitness   397 *acxvoignqprutwyszjefhklmbd  elapsed: 00:00:00.2546727 by Swap
generation   114 fitness   396 *ayhdegijknlzcbftpqwxuomrsv  elapsed: 00:00:00.2551986 by Swap
generation   115 fitness   381 *anmoxvbefhzwyturpqsgilkjcd  elapsed: 00:00:00.2564785 by Swap
generation   119 fitness   376 *acxvoignqprutsywzjhfeklmbd  elapsed: 00:00:00.2620751 by Swap
generation   124 fitness   359 *acxvoignqprutswyzjhfeklmbd  elapsed: 00:00:00.2709926 by Swap
generation   129 fitness   354 *acbdmlkhefgjzywturpqnsiovx  elapsed: 00:00:00.2784593 by Swap
generation   133 fitness   347 *acxvoimnqprutswyzjhfeklgbd  elapsed: 00:00:00.2847065 by Swap
generation   136 fitness   345 *acyzokjnqprutwxsvgefhilmbd  elapsed: 00:00:00.2890470 by Swap
generation   138 fitness   338 *acxzoijnpqrstwyuvgefhklmbd  elapsed: 00:00:00.2926882 by Swap
generation   140 fitness   325 *acyzqkjnopruvwxstgefhilmbd  elapsed: 00:00:00.2960075 by Swap
generation   142 fitness   323 *acyzjkonqpruxwtsvgefhilmbd  elapsed: 00:00:00.2990112 by Swap
generation   143 fitness   320 *acbdmljihfgexywturpnqskovz  elapsed: 00:00:00.3021504 by Swap
generation   144 fitness   299 *acbdkljihfgexywturpnqsmovz  elapsed: 00:00:00.3030250 by Swap
generation   153 fitness   297 *acbdkljiefghxywtuspnqrmovz  elapsed: 00:00:00.3173747 by Swap
generation   160 fitness   296 *acbdkljiefghxywtusnpqrmovz  elapsed: 00:00:00.3281653 by Swap
generation   161 fitness   293 *acbdmljihfgexywutsknqrpovz  elapsed: 00:00:00.3297956 by Swap
generation   171 fitness   289 *acbdmljihfgexywtusqrpnkovz  elapsed: 00:00:00.3448798 by Swap
generation   176 fitness   286 *acbdmljihfgexwvtusknqrpoyz  elapsed: 00:00:00.3530632 by Swap
generation   176 fitness   285 *fhjlkegdbcazxvqimnoprstuwy  elapsed: 00:00:00.3535371 by Swap
generation   180 fitness   284 *acbdgekljhfzywutsrponmiqvx  elapsed: 00:00:00.3592053 by Swap
generation   180 fitness   281 *acbdmljihfgexywutsqrpnkovz  elapsed: 00:00:00.3599044 by Swap
generation   180 fitness   280 *acbdkljihgfeyxwturpnqsmovz  elapsed: 00:00:00.3612232 by Swap
generation   181 fitness   273 *acbdkljihfgexywutsqnmrpovz  elapsed: 00:00:00.3626863 by Swap
generation   181 fitness   269 *axwoprqnmsuvtyzefghijlkdbc  elapsed: 00:00:00.3634520 by Swap
generation   188 fitness   267 *acbdkljihfgexwvtusmnqropyz  elapsed: 00:00:00.3733029 by Swap
generation   189 fitness   264 *acbdkljihfgeyxwutsqnmrpovz  elapsed: 00:00:00.3754586 by Swap
generation   195 fitness   258 *acbdkljihfgeyxwutsonmrpqvz  elapsed: 00:00:00.3844623 by Swap
generation   202 fitness   257 *abcdkljihfgexwvtusmnqrpoyz  elapsed: 00:00:00.3978555 by Swap
generation   205 fitness   256 *acbdkljihgfeyxwutrqsmnpovz  elapsed: 00:00:00.4019836 by Swap
generation   205 fitness   250 *acbdkljihfgeyxwutsonmqprvz  elapsed: 00:00:00.4032208 by Swap
generation   206 fitness   249 *abcdkljihfgexwvutsmnqrpoyz  elapsed: 00:00:00.4048456 by Swap
generation   213 fitness   248 *acdbmkjihgfeyxvutsrpqonlwz  elapsed: 00:00:00.4152894 by Swap
generation   222 fitness   245 *abcdkljihfgexwvutsmnqopryz  elapsed: 00:00:00.4283747 by Swap
generation   226 fitness   240 *cabdkljihgfexwvtusrqmnopyz  elapsed: 00:00:00.4365650 by Swap
generation   233 fitness   239 *abcdkljihfgexwvutsqrpmonyz  elapsed: 00:00:00.4473649 by Swap
generation   252 fitness   232 *cabdefghkljixwvtusrqmnopyz  elapsed: 00:00:00.4760236 by Reverse
generation   253 fitness   231 *acdbmkjihgfelnoqprstuvxzwy  elapsed: 00:00:00.4784075 by Reverse
generation   257 fitness   212 *abcdiljkhgfeponmqrsutvwxyz  elapsed: 00:00:00.4844138 by Reverse
generation   262 fitness   199 *aedcywvutsrpqonmlkjihgfbxz  elapsed: 00:00:00.4928000 by Reverse
generation   274 fitness   191 *aedcyxbfghijklmnopqrstuvwz  elapsed: 00:00:00.5169754 by Reverse
generation   281 fitness   186 *abcdiefghkjlponmqrstuvwxyz  elapsed: 00:00:00.5274966 by Reverse
generation   283 fitness   172 *aedcyxwvutsrqponmlkjihgfbz  elapsed: 00:00:00.5299209 by Reverse
generation   289 fitness   148 *aedcbfghijklmnopqrstuvwxyz  elapsed: 00:00:00.5384234 by Reverse
generation   321 fitness   146 *adcefghijklmonpqrstuvwxyzb  elapsed: 00:00:00.5865374 by Reverse
generation   328 fitness   145 *badcefghijklmonpqrstuvwxyz  elapsed: 00:00:00.5968006 by Reverse
generation   342 fitness   129 *abcdefghijklmnopqrstuwvxyz  elapsed: 00:00:00.6179554 by Reverse
generation   369 fitness   122 *abcdefghijklmnopqrstuvwxyz  elapsed: 00:00:00.6564651 by Reverse
generation   403 fitness   121 *azyxwvutsrqponmlkjihgfedcb  elapsed: 00:00:00.7050802 by Reverse
cb*azyxwvutsrqponmlkjihgfed

The speed has improved but there is an interesting pattern of strategies in the output above. We’re now using the same strategy as long as it generated any improvement in the pool. This can result in over reliance of a particular strategy when another might do better.

Our current code for picking a new strategy in GetBest() is as follows:

                    strategy = _strategies
                        .Where(x => !ReferenceEquals(x, previousStrategy))
                        .Shuffle()
                        .First();

When we think about this problem, it is clear that Crossover and Mutation will be useless once the gene sequence contains only one copy of each gene in the gene set. This gives us a 50% chance (3/4*1/2 when previousStrategy was Swap, Reverse or Shift + 1/4*1/2 when previousStrategy was Crossover or Mutation) of picking a useless strategy once the duplicates have been removed. If we change this to use the historically successful strategies, with at least one copy of all 5 strategies included by default, the likelihood of a strategy being chosen will rise or fall with its success. Of course, to do that we’d have to track the strategy usage somehow. An easy way to do that is to keep a copy of the parent of each child.

    public class Individual
    {
        public int Fitness;
        public string Genes;
        public Individual Parent;
        public IGeneticStrategy Strategy;
    }

then set it when we create the child with one of the strategies, e.g.

            var child = new Individual
            {
                Genes = new String(childGenes),
                Strategy = this,
                Parent = parentA
            };

except those created by RandomStrategy of course. Next, in GetBest we’ll get the strategies from the ancestors of the new best parent whenever we find one.

        private static IEnumerable<Individual> GetAncestors(Individual bestIndividual)
        {
            while (bestIndividual != null)
            {
                yield return bestIndividual;
                bestIndividual = bestIndividual.Parent;
            }
        }

And scale their frequences to 3x the number of Strategies in the system.

        private readonly int _numberOfAncestorStrategiesToUse;

        public GeneticSolver()
        {
            _strategies = (from t in Assembly.GetExecutingAssembly().GetTypes()
                           where t.GetInterfaces().Contains(typeof(IGeneticStrategy))
                           where t.GetConstructor(Type.EmptyTypes) != null
                           select Activator.CreateInstance(t) as IGeneticStrategy).ToArray();

            _numberOfAncestorStrategiesToUse = 3 * _strategies.Length;

to that we’ll add one copy of each strategy.

        private List<IGeneticStrategy> GetStrategyPool(IEnumerable<Individual> ancestors)
        {
            var ancestorStrategies = ancestors
                .Select(x => x.Strategy)
                .Where(x => x != _randomStrategy)
                .ToList();
            var scaleTo = Math.Min(ancestorStrategies.Count, _numberOfAncestorStrategiesToUse);
            var successfulStrategies = ancestorStrategies
                .GroupBy(x => x)
                .SelectMany(x => Enumerable.Repeat(x.Key, (int)(scaleTo * x.Count() * 1.0 / ancestorStrategies.Count)));

            var strategies = _strategies.Concat(successfulStrategies).ToList();
            return strategies;
        }

This let’s strategies that appear often in a winner’s ancestry be chosen often.

            var children = GenerateChildren(parents, new[]{_randomStrategy}, geneSet);
            var strategies = _strategies.ToList();
            var stopwatch = new Stopwatch();
            stopwatch.Start();

            do
            {
                var potentialParents = new List<Individual>();
                foreach (var child in children.Where(x => uniqueIndividuals.Add(GetCanonicalGenes(x.Genes))))
                {
                    child.Fitness = getFitness(child.Genes);
                    potentialParents.Add(child);
                    if (bestParent.Fitness > child.Fitness)
                    {
                        bestParent = child;
                        var ancestors = GetAncestors(bestParent).ToList();
                        strategies = GetStrategyPool(ancestors);
                        if (ancestors.Count > maxIndividualsInPool)
                        {
                            maxIndividualsInPool = ancestors.Count;
                        }
                        displayChild(generationCount,
                                     bestParent.Fitness,
                                     bestParent.Genes,
                                     bestParent.Strategy.Description);
                        stopwatch.Reset();
                        stopwatch.Start();
                    }
                }
                generationCount++;
                parents = parents
                    .Concat(potentialParents)
                    .OrderBy(x => x.Fitness)
                    .Take(maxIndividualsInPool)
                    .ToList();
                children = GenerateChildren(parents, strategies, geneSet);
            } while (parents[0].Fitness > 0 &&
                     stopwatch.Elapsed.TotalSeconds <= MaxSecondsWithoutImprovement);

Next we’ll move the responsibility for stopping pulling from GenerateChildren to the caller

            var children = GenerateChildren(parents, _randomStrategy, geneSet)
                .Where(x => uniqueIndividuals.Add(x.Genes));
            var strategies = _strategies.ToList();
            var stopwatch = new Stopwatch();
            stopwatch.Start();

            do
            {
                var potentialParents = new List<Individual>();
                foreach (var child in children
                    .TakeWhile(x => potentialParents.Count < maxIndividualsInPool))
                {
...

                 children = GenerateChildren(parents, strategies, geneSet)
                    .Where(x => uniqueIndividuals.Add(x.Genes));
            } while (parents[0].Fitness > 0 &&
                     stopwatch.Elapsed.TotalSeconds <= MaxSecondsWithoutImprovement);

and update GenerateChildren to yield return items forever instead of returning a specific number.

        private IEnumerable<Individual> GenerateChildren(
            IList<Individual> parents,
            IList<IGeneticStrategy> strategyPool,
            string geneSet)
        {
            int parentBIndex = _random.Next(parents.Count);
            var parentB = parents[parentBIndex];
            while (true)
            {
                int parentAIndex = _random.Next(parents.Count);
                var parentA = parents[parentAIndex];
                var strategy = strategyPool[_random.Next(strategyPool.Count)];
                var child = strategy.CreateChild(parentA, parentB, geneSet);
                child.Genes = GetCanonicalGenes(child.Genes);
                yield return child;
                parentB = parentA;
            }
        }

Now run again

...
generation   151 fitness   187 *bdzayxwvutsrqponlmjkighfec  elapsed: 00:00:00.3273716 by Reverse
generation   164 fitness   186 *bdzayxwvutsrqponlmkjgihfec  elapsed: 00:00:00.3487084 by Swap
generation   168 fitness   178 *bczayxwvutsrqponlmkjgihfed  elapsed: 00:00:00.3546261 by Swap
generation   172 fitness   162 *bcdefhigjkmlnopqrstuvwxyaz  elapsed: 00:00:00.3613240 by Swap
generation   189 fitness   154 *azyxwuvtsrqponlmkjgihfedcb  elapsed: 00:00:00.3908327 by Swap
generation   240 fitness   147 *abdcefghijkmlnpoqrstuvwxyz  elapsed: 00:00:00.4744304 by Swap
generation   256 fitness   145 *azyxwuvtsrqponmlkjgihfedcb  elapsed: 00:00:00.4999965 by Swap
generation   257 fitness   137 *azyxwuvtsrqponmlkjighfedcb  elapsed: 00:00:00.5015466 by Reverse
generation   283 fitness   129 *azyxwvutsrqponmlkjighfedcb  elapsed: 00:00:00.5427432 by Swap
generation   368 fitness   128 *bazyxwvutsrqponmlkjihgfedc  elapsed: 00:00:00.6791415 by Reverse
generation   380 fitness   121 *azyxwvutsrqponmlkjihgfedcb  elapsed: 00:00:00.6989716 by Reverse

Great! It still finds the optimal solution in about the same amount of time and does so with better diversity in the strategies used.

Now it turns out there are a bunch of standard Travelling Salesman Problems in TSPLIB for which optimal answers are known. Let’s pick a symmetric one about twice the size of the one we just solved (eil51) and see how our solver performs.

We’ll have to make a few changes to the RouteOptimizationSolver but it would be nice to keep our circle route around for benchmarking too so let’s extract the circle specific parts to a new class.

    public interface IRouteSource
    {
        string GeneSet { get; }
        string OptimalRoute { get; }
        double GetDistance(char pointA, char pointB);
    }
	
	public class CircleRouteSource : IRouteSource
    {
        private const int Radius = 20;
        private readonly Dictionary<char, Point> _pointLookup;

        public CircleRouteSource()
        {
            int pointOffset = 0;
            _pointLookup = GetNPointsOnCircle(new Point(Radius, Radius), Radius, GeneSet.Length - 1)
                .ToDictionary(x => GeneSet[pointOffset++], x => x);
            _pointLookup.Add('*', new Point(0, _pointLookup['a'].Y));
        }

        public string GeneSet
        {
            get { return "abcdefghijklmnopqrstuvwxyz*"; }
        }

        public double GetDistance(char pointA, char pointB)
        {
            return GetDistance(_pointLookup[pointA], _pointLookup[pointB]);
        }

        public string OptimalRoute
        {
            get { return "*azyxwvutsrqponmlkjihgfedcb"; }
        }

        private static double GetDistance(Point pointA, Point pointB)
        {
            int sideA = pointA.X - pointB.X;
            int sideB = pointA.Y - pointB.Y;
            double sideC = Math.Sqrt(sideA * sideA + sideB * sideB);
            return sideC;
        }

        private static IEnumerable<Point> GetNPointsOnCircle(Point center, int radius, int n)
        {
            double alpha = Math.PI * 2 / (n + 1);
            var points = new List<Point>(n);
            int maxArrayIndex = 2 * radius - 1;
            Func<int, int> forceInBounds = x => Math.Min(Math.Max(0, x), maxArrayIndex);
            int i = n / 2;
            while (i < 3 * n / 2)
            {
                double theta = alpha * i++;
                int x = (int)(Math.Cos(theta) * radius);
                int y = (int)(Math.Sin(theta) * radius);
                var pointOnCircle = new Point(forceInBounds(center.X + x), forceInBounds(center.Y + y));
                points.Add(pointOnCircle);
            }

            return points;
        }
    }

Then update the RouteOptimizationSolver to use an IRouteSource

        private static int CalculateRouteLength(
            string sequence,
            IRouteSource routeSource)
        {
            var points = sequence.ToList();
            int distinctCount = points.Distinct().Count();
            points.Add(points[0]);
            double routeLength = points
                .Select((x, i) => i == 0 ? 0 : routeSource.GetDistance(x, points[i - 1]))
                .Sum();
            int fitness =
                1000 * (sequence.Length - distinctCount)
                + (int)Math.Floor(routeLength);
            return fitness;
        }
		
        public string Solve(IRouteSource routeSource, int maxSecondsWithoutImprovement)
        {
            if (routeSource.OptimalRoute != null)
            {
                Console.WriteLine("Expect optimal route " + routeSource.OptimalRoute
                                  + " to have fitness " + CalculateRouteLength(routeSource.OptimalRoute, routeSource));
            }

            var stopwatch = new Stopwatch();
            stopwatch.Start();

            var geneticSolver = new GeneticSolver
                {
                    GetCanonicalGenes = GetCanonicalGenes,
                    MaxSecondsWithoutImprovement = maxSecondsWithoutImprovement
                };
            string result = geneticSolver.GetBest(
                routeSource.GeneSet.Length,
                routeSource.GeneSet,
                child => CalculateRouteLength(child, routeSource),
                (generation, fitness, genes, strategy) => Display(generation, fitness, genes, strategy, stopwatch));
            Console.WriteLine(GetCanonicalGenes(result));

            return GetCanonicalGenes(result);
        }		

Next we can implement the TSPLIB specific methods in a new IRouteSource called TsplibRouteSource. We’ll start with the way we calculate the distance between two points.

    public class TsplibRouteSource : IRouteSource
    {
        public string GeneSet
        {
            get { return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890"; }
        }
        public string OptimalRoute
        {
            get { return null; }
        }
        public double GetDistance(char pointA, char pointB)
        {
            return GetDistance(_pointLookup[pointA], _pointLookup[pointB]);
        }

        private static double GetDistance(Point pointA, Point pointB)
        {
            int sideA = pointA.X - pointB.X;
            int sideB = pointA.Y - pointB.Y;
            double sideC = Math.Sqrt(sideA * sideA + sideB * sideB);
            return (int)(.5 + sideC);
        }
    }

Next we need to read the TSPLIB problem file format instead of generating points on a circle. The problem files have some number of header lines, then a data start marker line “NODE_COORD_SECTION”, then numbered points on separate lines, and finally a line containing “EOF”. Example:

NAME : foo
COMMENT : bar
TYPE : TSP
DIMENSION : 51
EDGE_WEIGHT_TYPE : EUC_2D
NODE_COORD_SECTION
1 8 5
2 7 60
... more lines
51 33 25
EOF

We can easily convert that to a point lookup.

        private const string GenericGeneSet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
        private readonly Dictionary<char, Point> _pointLookup;

        public TsplibRouteSource(string filepath)
        {
            int pointCount = 0;
            _pointLookup = GetTsplibFilePoints(filepath)
                .ToDictionary(x => GenericGeneSet[pointCount++], x => x);
        }

        public string GeneSet
        {
            get { return new String(_pointLookup.Keys.ToArray()); }
        }
		
        private static IEnumerable<Point> GetTsplibFilePoints(string filepath)
        {
            var separator = new[] { ' ' };
            var points = File.ReadAllLines(filepath + ".tsp")
                .SkipWhile(x => x != "NODE_COORD_SECTION")
                .Skip(1)
                .TakeWhile(x => x != "EOF")
                .Select(x => x.Split(separator).Select(y => Int32.Parse(y)).ToArray())
                .Select(x => new Point(x[1], x[2]))
                .ToList();
            return points;
        }

Finally change the Program to use a TsplibRouteSource instead of a CircleRouteSource

        public static void Main()
        {
            new RouteOptimizationSolver().Solve(new TsplibRouteSource(@"Data\eil51"), 3);
        }

and we’re ready to run it.

...
generation   933 fitness   474 AfrUldqKeLiWkFaVwxQgzhEBvcIJtbCpuXHDMjGSoRPNsOmynTY  elapsed: 00:00:04.3969890 by Reverse
generation   938 fitness   473 AahEBvbcJItCuHXpiLDMjGSoRPsNOmynrdqKUleWkFTYfwxQgzV  elapsed: 00:00:04.4189983 by Reverse
generation   985 fitness   466 AaEBvbtJIcCuHXpiWDMjGSoRPsNOmynrdqKUleLkFTYfwxQgzhV  elapsed: 00:00:04.6328910 by Swap
generation   999 fitness   465 AfrdqKekFaVwxQgzhEBvcIJtbpCuHXLWiDMjGSoRPNsOmynUlTY  elapsed: 00:00:04.6960111 by Reverse
generation  1002 fitness   462 AaEBvbtIJcCuHXpiWDMjGSoRPsNOmynrdqKUleLkFTYfwxQgzhV  elapsed: 00:00:04.7086736 by Reverse
generation  1049 fitness   451 AfnymOsNPRoSGjMDHXupCbtJIcvBEhzgQxwVaFkWiLeKqdrUlTY  elapsed: 00:00:04.9245268 by Reverse
generation  1123 fitness   447 AfnymOsNPRoSGjMDHXpuCbtJIcvBEhzgQxwVaFkWiLeKqdrUlTY  elapsed: 00:00:05.2573138 by Reverse
generation  1238 fitness   445 AfnymOsNPRoSGjMDHXupCbtJIcvBEhzgQxwVaFkLiWeKqdrUlTY  elapsed: 00:00:05.7769130 by Swap
generation  1250 fitness   444 AfnymOsNPRoSGjMDHuXpCbtJIcvBEhzgQxwVaFkLiWeKqdrUlTY  elapsed: 00:00:05.8431349 by Reverse
generation  1275 fitness   442 AfnymOsNPRoSGjMDHXuCpbtJIcvBEhzgQxwVaFkLiWeKqdrUlTY  elapsed: 00:00:05.9562212 by Reverse
generation  1485 fitness   441 AfnymOsNPRoSGMjDHuXpbCtJIcvBEhzgQxwVaFkLiWeKqdrUlTY  elapsed: 00:00:06.9177573 by Swap
generation  1569 fitness   440 AfnymONsPRoSGMjDHXupCbtIJcvBEhzgQxwVaFkLiWeKqdrUlTY  elapsed: 00:00:07.3032016 by Reverse
generation  1625 fitness   436 AfnymONsPRoSGMjDHXpuCbtIJcvBEhzgQxwVaFkLiWeKqdrUlTY  elapsed: 00:00:07.5622508 by Reverse
generation  2046 fitness   433 AaFkLiDHXpuCbtIJcvBEhzgQxwVfnymONsPRoSGMjWeKqdrUlTY  elapsed: 00:00:09.5297327 by Reverse
AaFkLiDHXpuCbtIJcvBEhzgQxwVfnymONsPRoSGMjWeKqdrUlTY

The TSPLIB solution list says this problem has an optimal length of 426… so close. Here is an image containing the points we’ve been working with plotted.

eil51 points plotted

That is just mind boggling right?

So how do we improve this time? It is not easy to find patterns in the genes as they are now. Let’s assign the gene characters in the order of the optimal solution so it will be easier to pick out patterns. The optimal solution file has a layout similar to the problem file:

NAME : foo.opt.tour
COMMENT : ...
TYPE : TOUR
DIMENSION : 51
TOUR_SECTION
30
2
24
// .. more point ids
-1
EOF

We’ll read it when we read the problem file then use the ids to sort the Points before returning them.

        private static IEnumerable<Point> GetTsplibFilePoints(string filepath)
        {
            var separator = new[] { ' ' };
            var pointLookup = File.ReadAllLines(filepath + ".tsp")
                .SkipWhile(x => x != "NODE_COORD_SECTION")
                .Skip(1)
                .TakeWhile(x => x != "EOF")
                .Select(x => x.Split(separator).Select(y => Int32.Parse(y)).ToArray())
                .ToDictionary(x => x[0], x => new Point(x[1], x[2]));

            var optimalPath = File.ReadAllLines(filepath + ".opt.tour")
                .SkipWhile(x => x != "TOUR_SECTION")
                .Skip(1)
                .TakeWhile(x => x != "-1")
                .Select(x => Int32.Parse(x))
                .ToArray();

            return optimalPath.Select(x => pointLookup[x]);
        }
		
        public string OptimalRoute
        {
            get { return GeneSet; }
        }		

This time when we run it

Expect optimal route abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY to have fitness 426
generation     1 fitness 14588 BjwovHlvnecYUysWIiwSzqssKHoQbGDRzyhxuhESNWptPLaJFhl  elapsed: 00:00:00.0550563 by Random
generation     1 fitness 13709 AdcntKmIrRjLvlWPprnEFsWCeIOnEoYybzurfQGMXtygTJtNVJm  elapsed: 00:00:00.0611060 by Random
generation     4 fitness 13694 AdcntKmIrRvLjlWPprnXFsWCeIOnEoYybzurfQGMEtygTJtNVJm  elapsed: 00:00:00.0888278 by Swap
generation     4 fitness 13585 BjQovHlvnecYUysWIiwSzqssKHoQbGDRzfhxuhESNWptPLaJFhl  elapsed: 00:00:00.0903772 by Mutation
generation     5 fitness 12704 AdcntKmIrRjLvlWPprntFsWCeIOaEoYybzurfQGMXtygTJtNVJm  elapsed: 00:00:00.0921313 by Mutation
generation     6 fitness 12679 AdcntKmIrRjLvlWPprntFsWCeIOaEoYybUurfQGMXtygTJtNVJm  elapsed: 00:00:00.0964511 by Crossover
...
generation  1141 fitness   450 ABzyxwvutsrqponmlkjihgfedcbaYXWVONMKLJIHGFERQTUPSCD  elapsed: 00:00:05.2143423 by Reverse
generation  1203 fitness   449 ABzyxwvutsrqpmnolkjihgfedcbaYXWVONMKLJIHGFERQPUTSCD  elapsed: 00:00:05.4874540 by Swap
generation  1228 fitness   446 ABzyxwvutsqpromlnkjihgfedcbaYXWVONMLKJIHGFERQPUTSCD  elapsed: 00:00:05.5966821 by Swap
generation  1312 fitness   445 ABzyxwvutsrqponlmkjihgfedcbaYXWVONMLKJIHGFERQPUTSCD  elapsed: 00:00:05.9671800 by Reverse
generation  1382 fitness   438 ABzyxwvutsrqpomlnkjihgfedcbaYXWVONMKLJIHGFERQPUTSDC  elapsed: 00:00:06.2721440 by Reverse
generation  1618 fitness   435 ABzyxwvutsrqponmlkjihgfedcbaYXWVONMLKJIHGFERQPUTSDC  elapsed: 00:00:07.3276965 by Reverse
generation  1780 fitness   434 ABzyxwvutsrqpomnkljihgfedcbaYXWVUTQPONMLKJIHGFERSDC  elapsed: 00:00:08.0475115 by Swap
generation  1931 fitness   433 ABzyxwvutsrqpomlnkjihgfedcbaYXWVUTQPONMLKJIHGFERSDC  elapsed: 00:00:08.7351971 by Swap
generation  1972 fitness   429 ABzyxwvutsrqpomlnkjihgfedcbaYXWVUTSRQPONMKLJIHGFEDC  elapsed: 00:00:08.9238197 by Reverse
generation  2270 fitness   427 ABzyxwvutsrqpomlnkjihgfedcbaYXWVUTSRQPONMLKJIHGFEDC  elapsed: 00:00:10.3007169 by Reverse
generation  2402 fitness   426 ABzyxwvutsrqponmlkjihgfedcbaYXWVUTSRQPONMLKJIHGFEDC  elapsed: 00:00:10.9062857 by Reverse
ABzyxwvutsrqponmlkjihgfedcbaYXWVUTSRQPONMLKJIHGFEDC

it finds an optimal solution but only does so 5 times in 100 runs:

426 - 5 times
427 - 6 times
428 - 2 times
429 - 6 times
430 - 5 times
431 - 10 times
432 - 9 times
433 - 9 times
434 - 14 times
435 - 7 times
436 - 5 times
437 - 6 times
438 - 4 times
439 - 1 times
440 - 7 times
442 - 1 times
443 - 2 times
444 - 1 times

What could the problem be? Could it be that we need more time to go the final distance? Let’s increase the timeout to 60 seconds and benchmark again:

426 - 8 times
427 - 17 times
428 - 6 times
429 - 6 times
430 - 3 times
431 - 7 times
432 - 15 times
433 - 12 times
434 - 4 times
435 - 7 times
436 - 3 times
437 - 5 times
438 - 3 times
440 - 1 times
441 - 1 times
442 - 1 times
443 - 1 times

That almost doubled the number of optimal results but even including the pen-optimal results we only get 25 percent. Clearly we need to do more.

Let’s examine the last few generations of the optimal run above:

...
generation  1141 fitness   450 ABzyxwvutsrqponmlkjihgfedcbaYXWVONMKLJIHGFERQTUPSCD  elapsed: 00:00:05.2143423 by Reverse
generation  1203 fitness   449 ABzyxwvutsrqpmnolkjihgfedcbaYXWVONMKLJIHGFERQPUTSCD  elapsed: 00:00:05.4874540 by Swap
generation  1228 fitness   446 ABzyxwvutsqpromlnkjihgfedcbaYXWVONMLKJIHGFERQPUTSCD  elapsed: 00:00:05.5966821 by Swap
generation  1312 fitness   445 ABzyxwvutsrqponlmkjihgfedcbaYXWVONMLKJIHGFERQPUTSCD  elapsed: 00:00:05.9671800 by Reverse
generation  1382 fitness   438 ABzyxwvutsrqpomlnkjihgfedcbaYXWVONMKLJIHGFERQPUTSDC  elapsed: 00:00:06.2721440 by Reverse
generation  1618 fitness   435 ABzyxwvutsrqponmlkjihgfedcbaYXWVONMLKJIHGFERQPUTSDC  elapsed: 00:00:07.3276965 by Reverse
generation  1780 fitness   434 ABzyxwvutsrqpomnkljihgfedcbaYXWVUTQPONMLKJIHGFERSDC  elapsed: 00:00:08.0475115 by Swap
generation  1931 fitness   433 ABzyxwvutsrqpomlnkjihgfedcbaYXWVUTQPONMLKJIHGFERSDC  elapsed: 00:00:08.7351971 by Swap
generation  1972 fitness   429 ABzyxwvutsrqpomlnkjihgfedcbaYXWVUTSRQPONMKLJIHGFEDC  elapsed: 00:00:08.9238197 by Reverse
generation  2270 fitness   427 ABzyxwvutsrqpomlnkjihgfedcbaYXWVUTSRQPONMLKJIHGFEDC  elapsed: 00:00:10.3007169 by Reverse
generation  2402 fitness   426 ABzyxwvutsrqponmlkjihgfedcbaYXWVUTSRQPONMLKJIHGFEDC  elapsed: 00:00:10.9062857 by Reverse

Notice that Swap and Reverse are doing all the work once we get a unique set of genes. This makes sense because Mutation would introduce a duplicate gene, giving the fitness a 1000 point penalty. Crossover has the same problem.

Reverse and Swap are clearly inefficient when we get close to optimal. Consider what changed when going from fitness 429 to 426 in the set above. The sequence went from …omlnk… to …onmlk… gene n moved left 2 positions. Now how we would do that using only Swap and Reverse? It takes two calls to Reverse, and they have to be the right ones. That means we have a 1 in 51*50 chance of Reverse picking the sequence mln to reverse, and then another 1 in 51*50 chance of it picking the sequence lm to re-reverse in a later generation, assuming it picks that parent at all. We need another strategy that is more useful when the sequences are close to optimal.

A strategy that could produce the same result is to shift 1 gene 2 positions to the left – ml would move right one position and n would move to the original m position. This could be done by picking a random position and a random segment length to shift over one position. The probability of this strategy converting mln to nml would be the same as a single Reverse operation has of reversing that segment. Also, if the segment length was 1 it would be equivalent to swapping two adjacent genes – this would be useful for changing sequence …ih… to …hi…, as previously seen. Let’s call this strategy Shift and allow it to randomly shift a segment either left or right.

    public class ShiftStrategy : AbstractStrategy, IGeneticStrategy
    {
        public Individual CreateChild(Individual parentA, Individual parentB, string genes)
        {
            const int charsToShift = 1;
            if (parentA.Genes.Length < charsToShift + 1)
            {
                return parentA;
            }
            bool shiftingPairLeft = Random.Next(2) == 1;
            string childGenes;
            int segmentStart = Random.Next(parentA.Genes.Length - charsToShift - 1);
            int segmentLength = Random.Next(charsToShift + 1, parentA.Genes.Length + 1 - segmentStart);
            string childGenesBefore = parentA.Genes.Substring(0, segmentStart);

            if (shiftingPairLeft)
            {
                string shiftedSegment = parentA.Genes.Substring(segmentStart, segmentLength - charsToShift);
                string shiftedPair = parentA.Genes.Substring(segmentStart + segmentLength - charsToShift, charsToShift);
                string childGenesAfter = parentA.Genes.Substring(segmentStart + segmentLength);
                childGenes = childGenesBefore + shiftedPair + shiftedSegment + childGenesAfter;
            }
            else
            {
                string shiftedPair = parentA.Genes.Substring(segmentStart, charsToShift);
                string shiftedSegment = parentA.Genes.Substring(segmentStart + charsToShift, segmentLength - charsToShift);
                string childGenesAfter = parentA.Genes.Substring(segmentStart + segmentLength);
                childGenes = childGenesBefore + shiftedSegment + shiftedPair + childGenesAfter;
            }
            var child = new Individual
            {
                Genes = childGenes,
                Strategy = this,
                Parent = parentA
            };
            return child;
        }
    }

When we run benchmark the solver we can see the impact of the Shift strategy:

426 - 20 times
427 - 20 times
428 - 3 times
429 - 3 times
430 - 1 times
431 - 13 times
432 - 11 times
433 - 6 times
434 - 11 times
435 - 9 times
436 - 1 times
438 - 1 times
440 - 1 times

Lastly, we have lower and lower diversity in the parent pool as we approach optimal fitness. We need a more diverse set of sequences, particularly as we approach optimal, so that we have multiple paths (rather than one and only one path) that our strategies can take to get to the optimal solution.

If we introduce another feature of biology, A/B genetic lines, we can double it again.

        public string GetBest(int length,
                              string geneSet,
                              Func<string, int> getFitness,
                              Action<int, int, string, string> displayChild)
        {
            _randomStrategy = new RandomStrategy(length);

            int maxIndividualsInPool = geneSet.Length * 3;
            int generationCount = 1;
            var uniqueIndividuals = new HashSet<string>();
            var parents = GenerateParents(geneSet)
                .Where(x => uniqueIndividuals.Add(x.Genes))
                .Take(maxIndividualsInPool)
                .ToList();
            foreach (var individual in parents)
            {
                individual.Fitness = getFitness(individual.Genes);
            }
            parents = parents.OrderBy(x => x.Fitness).ToList();

            var bestParent = parents.First();
            displayChild(generationCount,
                         bestParent.Fitness,
                         bestParent.Genes,
                         bestParent.Strategy.Description);

            var children = GenerateChildren(parents, new[] { _randomStrategy }, geneSet)
                .Where(x => uniqueIndividuals.Add(x.Genes));

            int numberOfParentLines = 2;
            int lastParentLine = numberOfParentLines - 1;
            int currentParentLine = lastParentLine;

            var parentLines = Enumerable.Range(0, numberOfParentLines)
                .Select(x => parents.ToList())
                .ToArray();

            var strategies = Enumerable.Range(0, numberOfParentLines)
                .Select(x => _strategies.ToList())
                .ToArray();

            double halfMaxAllowableSecondsWithoutImprovement = MaxSecondsWithoutImprovement / 2;
            IEqualityComparer<Individual> individualGenesComparer = new IndividualGenesComparer();

            var stopwatch = new Stopwatch();
            stopwatch.Start();

            do
            {
                var potentialParents = new List<Individual>();
                foreach (var child in children
                    .TakeWhile(x => potentialParents.Count < maxIndividualsInPool))
                {
                    child.Fitness = getFitness(child.Genes);
                    potentialParents.Add(child);
                    if (bestParent.Fitness > child.Fitness)
                    {
                        bestParent = child;
                        var ancestors = GetAncestors(bestParent).ToList();
                        strategies[currentParentLine] = GetStrategyPool(ancestors);
                        if (ancestors.Count > maxIndividualsInPool)
                        {
                            maxIndividualsInPool = ancestors.Count;
                        }
                        displayChild(generationCount,
                                     bestParent.Fitness,
                                     bestParent.Genes,
                                     bestParent.Strategy.Description);
                    }
                }
                generationCount++;
                parentLines[currentParentLine] = parentLines[currentParentLine]
                    .Concat(potentialParents)
                    .OrderBy(x => x.Fitness)
                    .Take(maxIndividualsInPool)
                    .ToList();

                if (parentLines[currentParentLine][0].Fitness == bestParent.Fitness ||
                    stopwatch.Elapsed.TotalSeconds < halfMaxAllowableSecondsWithoutImprovement)
                {
                    if (--currentParentLine < 0)
                    {
                        currentParentLine = lastParentLine;
                    }
                }
                else
                {
                    parentLines[currentParentLine] = parentLines
                        .SelectMany(x => x.Take(5))
                        .Distinct(individualGenesComparer)
                        .ToList();
                }

                children = GenerateChildren(parentLines[currentParentLine], strategies[currentParentLine], geneSet)
                    .Where(x => uniqueIndividuals.Add(GetCanonicalGenes(x.Genes)));
            } while (bestParent.Fitness > 0 &&
                     stopwatch.Elapsed.TotalSeconds <= MaxSecondsWithoutImprovement);

            return bestParent.Genes;
        }

Now when we benchmark it we get the following results:

426 - 40 times
427 - 38 times
429 - 1 times
431 - 8 times
432 - 2 times
433 - 4 times
434 - 4 times
435 - 2 times
436 - 1 times

Another thing we can do is make the number of genetic lines configurable…

        public GeneticSolver()
        {
            _strategies = (from t in Assembly.GetExecutingAssembly().GetTypes()
                           where t.GetInterfaces().Contains(typeof(IGeneticStrategy))
                           where t.GetConstructor(Type.EmptyTypes) != null
                           select Activator.CreateInstance(t) as IGeneticStrategy).ToArray();

            _numberOfAncestorStrategiesToUse = 3 * _strategies.Length;

            GetCanonicalGenes = genes => genes;
            MaxSecondsWithoutImprovement = 20;
            NumberOfParentLines = 2;
        }
		public int NumberOfParentLines { get; set; }

and

            int lastParentLine = NumberOfParentLines - 1;
            int currentParentLine = lastParentLine;

            var parentLines = Enumerable.Range(0, NumberOfParentLines)
                .Select(x => parents.ToList())
                .ToArray();

            var strategies = Enumerable.Range(0, NumberOfParentLines)
                .Select(x => _strategies.ToList())
                .ToArray();

Now if we set NumberOfParentLines to 4 in RouteOptimizationSolver we can improve the grouping somewhat.

426 - 44 times
427 - 43 times
428 - 2 times
431 - 3 times
432 - 3 times
433 - 2 times
434 - 3 times

That is quite good enough for the time being.

If we run it with 10 parent lines we can get 95 percent optimal + pen-optimal results. Not bad at all.

You can download the final source code from my github repository.

When you’re ready to take it up a level (and to a different language), go on to part 4 or start with my implementation of this problem in Go.

Read Full Post »

In Part 1 we built our first genetic solver to generate the string “Hello World!”. This time we’re going to take on the slightly more interesting 8 Queens puzzle.

In the game of Chess, the Queen can attack across any number of unoccupied squares horizontally, vertically, or diagonally.

legal Queen moves on a Chess board

The 8 Queens puzzle asks us to place 8 Queens on a Chess board such that none of them are under attack. Take a couple of minutes to try to solve this with something physical like pennies on a paper Chess board to get a feel for how it might work.

It turns out getting 7 Queens into safe positions on a Chess board isn’t too difficult.

7 Chess Queens in safe positions on a Chess board

Getting 8 takes a bit more work. According to WikiPedia there are only 92 solutions to this puzzle and once we remove mirrorings and rotations there are only 12 unique solutions.

Clearly a straight iterative method is impractical – we’d be looking at 64 * 63 * 62 * 61 * 60 * 59 * 58 * 57 potential locations for the Queens assuming we don’t apply some logic to reduce the search space.

So how do we solve this genetically? We have to figure out a couple of things in order to set this up for our genetic solver.

  • what should our genes represent
  • how do we calculate partial fitness

In the previous post the genes were the solution, so we were able to use them without transformation. We could do the same thing with this puzzle by having genes represent rows and columns where Queens are placed – one gene for the row, and another for the column. But that introduces additional complexity into the solver that we don’t need yet. Instead, let’s keep the genes as one-to-one mappings of Queen locations through the use of a somewhat more complex Point encoding scheme. We’ll assign each square on the 8×8 Chess board a symbol from the 64 symbol set ([a-z][A-Z][0-9]@#) as follows:

board positions labelled first rows then columns

We’ll start off with the final code from the previous part in this series.

To convert a gene to a board position we find its index in the set of genes then convert that index to a row and column.

    public static class CharExtensions
    {
        public static Point ToPoint(this char gene)
        {
            const string genes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#";
            var index = genes.IndexOf(gene);
            var row = index / 8;
            var column = index % 8;
            return new Point(row, column);
        }
    }

Next we need a way to check all the board positions that could be attacked by a Queen from a particular board position. The Queen can attack in any of 8 different directions, which we can equate to Cardinal and Ordinal directions. We need to check each direction until another piece or the edge of the board is encountered. There are any number of ways to code this. Let’s try one that uses a variation of the Generate function from the previous post.

	public static class TExtensions
	{
		public static IEnumerable<T> GenerateFrom<T>(this T initial, Func<T,T> next) 
		{
			var current = initial;
			while (true)
			{
				yield return current;
				current = next(current);
			}
		}
	}

If we number the internal board starting at zero from the top left corner we can get the attackable points as follows:

    public class EightQueensSolver
    {
        public static int XOffsetEast = 1;
        public static int XOffsetWest = -1;

        public static int YOffsetNorth = -1;
        public static int YOffsetSouth = 1;

        public static int BoardWidth = 8;
        public static int BoardHeight = 8;

        public static IEnumerable<IEnumerable<Point>> GetAttackablePoints(Point queenPosition)
        {
            return new Func<Point,Point>[]
                {
                    GoNorth, GoNorthEast, GoEast, GoSouthEast, 
                    GoSouth, GoSouthWest, GoWest, GoNorthWest
                }
                .Select(direction => queenPosition
                    .GenerateFrom(direction)
                    .Skip(1)
                    .TakeWhile(IsOnTheBoard));
        }

        public static Point GoNorth(Point point)
        {
            return CreatePoint(point, 0, YOffsetNorth);
        }

        public static Point GoNorthEast(Point point)
        {
            return CreatePoint(point, XOffsetEast, YOffsetNorth);
        }

        public static Point GoEast(Point point)
        {
            return CreatePoint(point, XOffsetEast, 0);
        }

        public static Point GoSouthEast(Point point)
        {
            return CreatePoint(point, XOffsetEast, YOffsetSouth);
        }

        public static Point GoSouth(Point point)
        {
            return CreatePoint(point, 0, YOffsetSouth);
        }

        public static Point GoSouthWest(Point point)
        {
            return CreatePoint(point, XOffsetWest, YOffsetSouth);
        }

        public static Point GoWest(Point point)
        {
            return CreatePoint(point, XOffsetWest, 0);
        }

        public static Point GoNorthWest(Point point)
        {
            return CreatePoint(point, XOffsetWest, YOffsetNorth);
        }

        public static Point CreatePoint(Point point, int xOffset, int yOffset)
        {
            return new Point(point.X + xOffset, point.Y + yOffset);
        }

        public static bool IsOnTheBoard(Point point)
        {
            return point.X >= 0 && point.X < BoardWidth && point.Y >= 0 && point.Y < BoardHeight;
        }
    }

Next we need to determine the fitness of a particular gene sequence, or set of Queen positions. In the Hello World! problem we determined fitness by counting how many characters matched the desired result. In this case we don’t know the exact result, all we have are characteristics, so it would be better to count the negative. The equivalent for the Hello World! problem would be counting how many characters don’t match. When we get a zero fitness we’ve solved the problem. Let’s start by changing the GeneticSolver to use that logic for selecting the next parent.

        public string GetBest(int length, string geneSet, Func<string, int> getFitness, Action<int, int, string> displayChild)
        {
            int generationCount = 1;
            var parent = GenerateSequence(length, geneSet);
            var parentScore = getFitness(parent);
            displayChild(generationCount, parentScore, parent);
            while (parentScore > 0)
            {
                var child = Mutate(parent, geneSet);
                var childScore = getFitness(child);
                if (childScore < parentScore)
                {
                    parentScore = childScore;
                    parent = child;
                    displayChild(generationCount, childScore, child);
                }
                generationCount++;
            }
            return parent;
        }

Now the fitness can be a simple count of the number of Queens that can be attacked by any other Queen.

    public class EightQueensSolver
    {
        private const string GeneSet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#";
        private const int GeneCount = 8;

        public string Solve()
        {
            Func<string, int> getFitness = child =>
                {
                    var queenLocations =
                        new HashSet<Point>(child.Select(x => x.ToPoint(GeneSet, BoardWidth)));
                    int fitness =
                        queenLocations.Sum(
                            x =>
                            CountQueensAttacked(GetAttackablePoints(x), queenLocations));
                    return fitness;
                };
            // ...
        }

        private static int CountQueensAttacked(
            IEnumerable<IEnumerable<Point>> attackablePoints, 
            ICollection<Point> queenLocations)
        {
            return attackablePoints.Count(direction => direction.Any(queenLocations.Contains));
        }

        // ...

which necessitates a change in the ToPoint function

    public static class CharExtensions
    {
        public static Point ToPoint(this char gene, string geneSet, int width)
        {
            int index = geneSet.IndexOf(gene);
            int row = index/width;
            int column = index%width;
            return new Point(row, column);
        }
    }

Finally, we need a way to visualize the Queen positions on the board.

        public string Solve()
        {
            Func<string, int> getFitness = child =>
                {
                    var queenLocations = new HashSet<Point>(child.Select(x => x.ToPoint(GeneSet, BoardWidth)));
                    int fitness = queenLocations.Sum(x => CountQueensAttacked(GetAttackablePoints(x), queenLocations));
                    return fitness;
                };

            var stopwatch = new Stopwatch();
            stopwatch.Start();
            Action<int, int, string> displayCurrentBest =
                (generation, fitness, item) =>
                    {
                        var board = new char?[BoardHeight,BoardWidth];

                        foreach (var queenLocation in item.Select(x => x.ToPoint(GeneSet, BoardWidth)))
                        {
                            board[queenLocation.X, queenLocation.Y] = 'Q';
                        }

                        for (int i = 0; i < BoardHeight; i++)
                        {
                            for (int j = 0; j < BoardWidth; j++)
                            {
                                Console.Write(board[i, j] ?? '.');
                            }
                            Console.WriteLine();
                        }
                        Console.WriteLine("generation\t{0} fitness\t{1} elapsed: {2}",
                                          generation.ToString().PadLeft(5, ' '),
                                          fitness.ToString().PadLeft(2, ' '),
                                          stopwatch.Elapsed);
                    };
            string result = new GeneticSolver().GetBest(geneCount,
                                                        GeneSet,
                                                        getFitness,
                                                        displayCurrentBest);
            Console.WriteLine(result);
            return result;
        }

And we’re ready to run it. Let’s see what happens.

        public static void Main()
        {
            new EightQueensSolver().Solve();
        }

output:

...Q..Q.
........
........
..QQ.Q..
........
......Q.
Q.......
.Q......
generation        1 fitness    64 elapsed: 00:00:00.1066393
...Q....
........
........
..QQ.Q..
........
......Q.
Q.......
.Q......
generation       15 fitness    56 elapsed: 00:00:00.1371373
...Q....
........
........
..QQ.Q..
........
......Q.
........
.Q......
generation       31 fitness    48 elapsed: 00:00:00.1701097
........
........
........
..QQ.Q..
........
......Q.
........
.Q......
generation       50 fitness    40 elapsed: 00:00:00.1983747
........
........
........
..QQ....
........
......Q.
........
.Q......
generation       72 fitness    32 elapsed: 00:00:00.2252662
........
........
........
..QQ....
........
......Q.
........
........
generation      254 fitness    24 elapsed: 00:00:00.2629383

Wait, there are only 3 queens! What’s happening? Remember the frog? Nature is really good a filling vacuums, and so are genetic solvers. Our fitness function didn’t specify that the 8 Queens had to occupy different positions and the solver found that hole and optimized for it. So what do we do about it? Let’s return a very high value when two Queens occupy the same position so that it never happens.

            Func<string, int> getFitness = child =>
            {
                var queenLocations = new HashSet<Point>(child.Select(x => x.ToPoint(GeneSet, BoardWidth)));
                var fitness = queenLocations.Sum(x => CountQueensAttacked(GetAttackablePoints(x), queenLocations));
                fitness += 10000 * (GeneCount - queenLocations.Count);
                return fitness;
            };

Now run it again and get output like the following:

...Q...Q
..Q.QQ..
........
........
......Q.
...Q....
........
......Q.
generation          1 fitness   14 elapsed: 00:00:00.0539012
.......Q
..Q.QQ.Q
........
........
......Q.
...Q....
........
......Q.
generation          2 fitness   12 elapsed: 00:00:00.0568409
.......Q
..Q.QQ..
........
...Q....
......Q.
...Q....
........
......Q.
generation         28 fitness   10 elapsed: 00:00:00.0639691
.......Q
..Q.Q...
........
...Q....
......Q.
...Q....
.Q......
......Q.
generation        111 fitness    8 elapsed: 00:00:00.0836195
.......Q
..Q.Q...
Q.......
...Q....
......Q.
........
.Q......
......Q.
generation        575 fitness    6 elapsed: 00:00:00.1559271

We can let this run a long, long time and it won’t make much if any progress beyond a certain point. It stalls like our first solution to Hello world!. Clearly, randomly changing one Queen location at a time isn’t going to get us there. We’re going to have to improve our solver.

We currently have one Strategy – Mutation. We’re going to add another Strategy from nature – Cross Breeding successful individuals. In order to do that we’ll need a pool of individuals to select from. We’ll generate a number of children in each generation instead of just one and cross breed the best ones by randomly copying one gene from the second parent in place of the gene in the first parent – this is known as Crossover.

If we’re going to have a number of individuals, we’ll need a way to associate a fitness with each individual.

    public class Individual
    {
        public string Genes;
        public int Fitness;
    }

Next we’ll add a convenience method to the genetic solver for generating parents

        private IEnumerable<Individual> GenerateParents(int length, string geneSet)
        {
            Func<Individual> next = () => new Individual
                {
                    Genes = GenerateSequence(length, geneSet)
                };
            var initial = next();
            return initial.Generate(next);
        }

And another for generating the next generation using a particular strategy.

        private IEnumerable<Individual> GenerateChildren(
            IList<Individual> parents,
            Func<Individual, Individual, string, Individual> strategy,
            string geneSet)
        {
            int count = 0;
            while (count < parents.Count)
            {
                int parentAIndex = _random.Next(parents.Count);
                int parentBIndex = _random.Next(parents.Count);
                if (parentAIndex == parentBIndex)
                {
                    continue;
                }
                var parentA = parents[parentAIndex];
                var parentB = parents[parentBIndex];
                yield return strategy(parentA, parentB, geneSet);
                count++;
            }
        }

Then we’ll change the implementation of Mutate to be compatible

       private Individual Mutate(Individual parentA, Individual parentB, string geneSet)
        {
            var parentGenes = parentA.Genes.ToCharArray();
            int location = _random.Next(0, parentGenes.Length);
            parentGenes[location] = geneSet[_random.Next(0, geneSet.Length)];
            return new Individual { Genes = new String(parentGenes) };
        }

and add a Crossover implementation that starts with all the genes from parent A and replaces one with a gene from parent B.

        private Individual Crossover(Individual parentA, Individual parentB, string geneSet)
        {
            int crossOverPoint = _random.Next(parentA.Genes.Length);
            var childGenes = parentA.Genes.ToCharArray();
            var parentBGenes = parentB.Genes.ToCharArray();
            childGenes[crossOverPoint] = parentBGenes[crossOverPoint];
            var child = new Individual
            {
                Genes = new String(childGenes)
            };
            return child;
        }

Next we’ll revise the implementation of GetBest to use a pool of parents to generate children using the Crossover strategy. If none of the children has a fitness at least as good as the worst parent then we’ll generate the next generation using the Mutation strategy. We’ll also introduce a constraint that none of the generated individuals be identical to any previously generated individual. This helps to maintain the diversity of our gene pool. We’ll use a gene pool of three times the number of items in the geneSet.

        public string GetBest(int length,
                              string geneSet,
                              Func<string, int> getFitness,
                              Action<int, int, string> displayChild)
        {
            int maxIndividualsInPool = geneSet.Length * 3;
            int generationCount = 1;
            var uniqueIndividuals = new HashSet<string>();
            var parents = GenerateParents(length, geneSet)
                .Where(x => uniqueIndividuals.Add(x.Genes))
                .Take(maxIndividualsInPool)
                .ToList();
            foreach (var individual in parents)
            {
                individual.Fitness = getFitness(individual.Genes);
            }
            parents = parents.OrderBy(x => x.Fitness).ToList();

            displayChild(generationCount, parents.Last().Fitness, parents.Last().Genes);

            int worstParentFitness = parents.Last().Fitness;
            var children = GenerateChildren(parents, Crossover, geneSet);
            do
            {
                var improved = new List<Individual>();
                foreach (var child in children.Where(x => uniqueIndividuals.Add(x.Genes)))
                {
                    child.Fitness = getFitness(child.Genes);
                    if (worstParentFitness >= child.Fitness)
                    {
                        improved.Add(child);
                        if (worstParentFitness > child.Fitness)
                        {
                            displayChild(generationCount, child.Fitness, child.Genes);
                            worstParentFitness = child.Fitness;
                        }
                    }
                }
                generationCount++;
                if (improved.Any())
                {
                    parents = parents
                        .Concat(improved)
                        .OrderBy(x => x.Fitness)
                        .Take(maxIndividualsInPool)
                        .ToList();
                    children = GenerateChildren(parents, Crossover, geneSet);
                }
                else
                {
                    children = GenerateChildren(parents, Mutate, geneSet);
                }
            } while (parents[0].Fitness > 0);
            return parents[0].Genes;
        }

Finally, let’s also add a space between the visual board positions to make the displayed board look more square.

                        for(int i = 0; i < BoardHeight; i++)
                        {
                            for(int j = 0; j < BoardWidth; j++)
                            {
                                Console.Write(board[i,j]??'.');
                                Console.Write(' ');
                            }
                            Console.WriteLine();
                        }

Now run it again and get results like the following:

. . . . . . . . 
. . . . . . . . 
. . . . . . . . 
. . . . Q . . .
Q Q . . . . Q . 
. . Q . . . . .
. . . . . . . . 
. . . . . . . . 
generation          1 fitness   30008 elapsed: 00:00:00.1161487
. . . . Q Q Q . 
. . . . . . . . 
. . . . . . . . 
. . . . . . . . 
Q Q . . . . . . 
. . Q . . . . Q
. . . . . . . . 
. . . . . . . . 
generation          1 fitness   10014 elapsed: 00:00:00.1220378
. . Q . . . . .
. . . . . . . . 
. . . . . . . . 
Q . . . . . . Q
Q . . Q . . . . 
. . Q . . . . .
. . . Q . . . . 
. . Q . . . . .
generation          1 fitness   20 elapsed: 00:00:00.1248852
. . . . Q . . Q
. . Q . . . . .
. . . . . Q . . 
. . . . . . . . 
. . . . . . . . 
. . . Q . . . . 
. . . . Q Q . . 
. . Q . . . . .
generation          1 fitness   14 elapsed: 00:00:00.1344589
. . . . . . . . 
. . . . . . . . 
. . . Q Q . . .
. . . . . . Q . 
. . Q . . . . .
. Q . . . Q . . 
Q . . . . . . .
. . . . . . . Q 
generation          1 fitness   12 elapsed: 00:00:00.1451020
Q . . . Q . . .
. . . . . . Q . 
. . . . Q . . .
. . Q . . . . .
. Q . . . Q . . 
. . . . . . . . 
. . . . Q . . .
. . . . . . . . 
generation          1 fitness   10 elapsed: 00:00:00.1546707
Q . . . Q . . .
. . . . . . Q . 
. . . . Q . . .
. . Q . . . . .
. . . . . Q . . 
. Q . . . . . . 
. . . . Q . . .
. . . . . . . . 
generation          2 fitness    8 elapsed: 00:00:00.2066084
. . . . Q . . .
. . . . . . Q . 
. . . . Q . . .
. . Q . . . . .
. . . . . Q . . 
. Q . . . . . . 
. . . . Q . . .
. . . . . . . Q 
generation          3 fitness    6 elapsed: 00:00:00.2303158
Q . . . . . . .
. . . . . . Q . 
. . . . . . Q . 
. . Q . . . . .
. . . . . Q . Q 
. Q . . . . . . 
. . . . Q . . .
. . . . . . . . 
generation         21 fitness    4 elapsed: 00:00:00.7416604
. . . Q . . . . 
. . . . . . Q . 
. . . . . . Q . 
. . Q . . . . .
. . . . . Q . . 
. Q . . . . . . 
. . . . Q . . .
. . . . . . . Q 
generation         99 fitness    2 elapsed: 00:00:02.7303553
. . . Q . . . . 
. . . . . . . Q 
Q . . . . . . .
. . Q . . . . .
. . . . . Q . . 
. Q . . . . . . 
. . . . . . Q . 
. . . . Q . . .
generation        316 fitness    0 elapsed: 00:00:06.9379407
A8pdL2Pq

Success!

As a final modification, let’s change the method to return null if it finds a non-optimal solution and sort the position codes in the result since their order doesn’t matter.

            return getFitness(result) == 0 
                ? new String(result.OrderBy(x => x).ToArray()) 
                : null;

You can download the final source code from my github repository.

Go on to Part 3 or checkout my implementation of this problem in Go

Read Full Post »

Nature rewards survival with genetic replication. If a frog lives until it is mature enough to reproduce then it is considered successful. Its descendants will inherit some of its genes and some from its mate. If through this genetic variation a descendant develops a contact toxin on its skin, the frog species could, however, become overabundant due to lack of predators. Like the natural process for which they are named, genetic algorithms work through successive approximation towards a goal and breeding successful but not overly specialized descendants.

Problem to solve:

Without hard coding, generate the string “Hello World!”

Round 1: generate random strings until we match the expected value.

    internal class Program
    {
        public static void Main()
        {
            new StringDuplicator().Duplicate("Hello world!");
        }
    }

    public class StringDuplicator
    {
        public string Duplicate(string toMatch)
        {
            var result = new GeneticSolver().GetBest();
            Console.WriteLine(result);
            return result;
        }
    }

    public class GeneticSolver
    {
        readonly Random _random = new Random();
        public string GetBest(string toMatch)
        {
            //...
        }
    }

How does GetBest() know how many characters to generate? We could get that from the string itself with toMatch.Length. How does GetBest() know which characters to use? Does it get those from the string too? Does it use all possible characters or only printable ones? Let’s be more specific.

Round 1 revised: given # of genes, gene set, and expected result, generate random strings until we match the expected value.

    public static class TExtensions
    {
        public static IEnumerable<T> Generate<T>(this T initial, Func<T> next)
        {
            var current = initial;
            while (true)
            {
                yield return current;
                current = next();
            }
        }
    }

    public class GeneticSolver
    {
        private readonly Random _random = new Random();

        private string GenerateSequence(int length, string geneSet)
        {
            Func<char> next = () => geneSet[_random.Next(0, geneSet.Length)];
            char initial = next();
            return new String(initial.Generate(next).Take(length).ToArray());
        }

        public string GetBest(int length, string geneSet, string toMatch)
        {
            string generated = GenerateSequence(length, geneSet);
            while (generated != toMatch)
            {
                generated = GenerateSequence(length, geneSet);
            }
            return generated;
        }
    }

    public class StringDuplicator
    {
        public string Duplicate(string toMatch)
        {
            var solver = new GeneticSolver();
            string geneSet = new String(toMatch.Distinct().ToArray());
            string result = solver.GetBest(toMatch.Length,
                                           geneSet,
                                           toMatch);
            Console.WriteLine(result);
            return result;
        }
    }

Will it work? Probably not in our lifetime. We’re looking at 1 chance in 54^12 that it will generate the expected result. That is the problem with all or nothing fitness. Here we’ve said “get it exactly right or try again”. Few problems are likely to be simple enough that we can evolve a solution based solely on success or failure. Usually we’ll also need a measure of near success. This implies two scoring systems: one for unsuccessful individuals, and another for optimizing successful ones. These can often be combined.

This time, instead of all-or-nothing we’ll use a fitness function that gives points for the number of characters that are correct.

    public class StringDuplicator
    {
        public string Duplicate(string toMatch)
        {
            var solver = new GeneticSolver();
            int geneCount = toMatch.Length;
            Func<string, int> getFitness = child =>
                {
                    int matches = Enumerable.Range(0, geneCount)
                        .Count(x => child[x] == toMatch[x]);
                    return matches;
                };
            string geneSet = new String(toMatch.Distinct().ToArray());
            string result = solver.GetBest(toMatch.Length,
                                           geneSet,
                                           getFitness);
            Console.WriteLine(result);
            return result;
        }
    }

The second thing we need is a way to breed better results from previous results. Instead of generating completely random children we’ll generate an initial parent and create a child by taking the parent’s genes and making one random change. When the child gets a better fitness result than the current parent we make it the new parent.

Round 2: given # of genes, gene set, and a fitness function, generate random strings until we match the expected value.

        public string GetBest(int length, string geneSet, Func<string, int> getFitness)
        {
            string parent = GenerateSequence(length, geneSet);
            int parentScore = getFitness(parent);
            while (parentScore != length)
            {
                string child = Mutate(parent, geneSet);
                int childScore = getFitness(child);
                if (childScore > parentScore)
                {
                    parentScore = childScore;
                    parent = child;
                }
            }
            return parent;
        }

        private string Mutate(string parent, string geneSet)
        {
            int location = _random.Next(0, parent.Length);
            var parentGenes = parent.ToCharArray();
            parentGenes[location] = geneSet[_random.Next(0, geneSet.Length)];
            return new String(parentGenes);
        }

This time it runs in a fraction of a second… but how did it do it? Let’s pass in an action that we can call to display each successive child improvement. Let’s also track the number of generations and the elapsed time.

        public string GetBest(int length, string geneSet, Func<string, int> getFitness, Action<int, int, string> displayChild)
        {
            int generationCount = 1;
            var parent = GenerateSequence(length, geneSet);
            var parentScore = getFitness(parent);
            displayChild(generationCount, parentScore, parent);
            while (parentScore != length)
            {
                var child = Mutate(parent, geneSet);
                var childScore = getFitness(child);
                if (childScore > parentScore)
                {
                    parentScore = childScore;
                    parent = child;
                    displayChild(generationCount, parentScore, parent);
                }
                generationCount++;
            }
            return parent;
        }
        public string Duplicate(string toMatch)
        {
            var solver = new GeneticSolver();
            int geneCount = toMatch.Length;
            Func<string, int> getFitness = child =>
                {
                    int matches = Enumerable.Range(0, geneCount)
                        .Count(x => child[x] == toMatch[x]);
                    return matches;
                };
            string geneSet = new String(toMatch.Distinct().ToArray());
            var stopwatch = new Stopwatch();
            stopwatch.Start();
            Action<int, int, string> displayCurrentBest =
                (generation, fitness, item) =>
                Console.WriteLine("generation\t{0} fitness\t{1} {2}\telapsed: {3}",
                                  generation.ToString().PadLeft(5, ' '),
                                  fitness.ToString().PadLeft(TotalWidth(toMatch), ' '),
                                  item,
                                  stopwatch.Elapsed);

            string result = solver.GetBest(toMatch.Length,
                                           geneSet,
                                           getFitness,
                                           displayCurrentBest);
            Console.WriteLine(result);
            return result;
        }

    	private static int TotalWidth(string expected)
    	{
    		return (int)Math.Floor(Math.Log10(Math.Abs(expected.Length != 0 ? expected.Length : 1))) + 1;
    	}

Run it now and we get output like the following:

generation          1 fitness    0 rl!dHdrwl  r elapsed: 00:00:00.0093788
generation          4 fitness    1 re!dHdrwl  r elapsed: 00:00:00.0110906
generation         18 fitness    2 He!dHdrwl  r elapsed: 00:00:00.0112696
generation         20 fitness    3 He!lHdrwl  r elapsed: 00:00:00.0115229
generation         24 fitness    4 He!lHdrol  r elapsed: 00:00:00.0117862
generation         31 fitness    5 He!lHdroll r elapsed: 00:00:00.0120102
generation         52 fitness    6 He!lHdrorl r elapsed: 00:00:00.0122658
generation         97 fitness    7 He!lodrorl r elapsed: 00:00:00.0124217
generation        125 fitness    8 He!lodrorl ! elapsed: 00:00:00.0125791
generation        213 fitness    9 He!lodrorld! elapsed: 00:00:00.0127173
generation        234 fitness   10 Hellodrorld! elapsed: 00:00:00.0128413
generation        327 fitness   11 Hello rorld! elapsed: 00:00:00.0129618
generation        460 fitness   12 Hello world! elapsed: 00:00:00.0131015
Hello world!

You can download the final source code from my github repository.

Experiment with longer strings and larger gene sets.

Welcome to the world of genetic algorithms!

Move on to Part 2 or check out my implementation in Go.

Read Full Post »

The core application will be a simple calculator that can add, subtract, multiply and divide two double values. The output will be a JSON encoded POCO containing the calculated value OR an error message.

First the POCO:

[DataContract]
public class CalculationResult
{
	[DataMember] public double Answer;

	[DataMember] public string Message;
}

The attributes are used by WCF. We’ll be seeing a lot more of those. Next, the service contract:

[ServiceContract(SessionMode = SessionMode.NotAllowed)]
public interface ICalculator
{
	[WebGet(UriTemplate = "Add/{n1}/{n2}")]
	[OperationContract]
	CalculationResult Add(string n1, string n2);

	[WebGet(UriTemplate = "Subtract/{n1}/{n2}")]
	[OperationContract]
	CalculationResult Subtract(string n1, string n2);

	[WebGet(UriTemplate = "Multiply/{n1}/{n2}")]
	[OperationContract]
	CalculationResult Multiply(string n1, string n2);

	[WebGet(UriTemplate = "Divide/{n1}/{n2}")]
	[OperationContract]
	CalculationResult Divide(string n1, string n2);
}

The WebGet attribute configures WCF to let us call the method from an HTTP GET request, and the UriTemplate property lets us configure the structure of that URL for automatic parameter parsing.

We’re receiving strings but need to convert them to doubles, so here’s an extension method to do that:

public static class StringExtensions
{
	public static double? ToDouble(this string input)
	{
		double value1;
		return !Double.TryParse(input, out value1) ? (double?) null : value1;
	}
}

Next is the Calculator implementation:

[ServiceBehavior(IncludeExceptionDetailInFaults = false,
	AddressFilterMode = AddressFilterMode.Any,
	InstanceContextMode = InstanceContextMode.Single,
	ConcurrencyMode = ConcurrencyMode.Single)]
public class Calculator : ICalculator
{
	public CalculationResult Add(string n1, string n2)
	{
		Console.WriteLine("received request to Add " + n1 + " to " + n2);
		Func<double, double, double> add = (value1, value2)
		      => (value1 + value2);
		return Calculate(n1, n2, add);
	}

	public CalculationResult Subtract(string n1, string n2)
	{
		Console.WriteLine("received request to Subtract "+n2+" from "+n1);
		Func<double, double, double> subtract = (value1, value2)
		      => (value1 - value2);
		return Calculate(n1, n2, subtract);
	}

	public CalculationResult Multiply(string n1, string n2)
	{
		Console.WriteLine("received request to Multiply "+n1+" by "+n2);
		Func<double, double, double> multiply = (value1, value2)
		      => (value1 * value2);
		return Calculate(n1, n2, multiply);
	}

	public CalculationResult Divide(string n1, string n2)
	{
		Console.WriteLine("received request to Divide " + n1 + " by " + n2);
		Func<double, double, double> divide = (value1, value2)
		      => value2 == 0 ? Double.NaN : (value1 / value2);
		return Calculate(n1, n2, divide);
	}

	private static CalculationResult Calculate(
		string n1,
		string n2,
		Func<double, double, double> calculate)
	{
		var value1 = n1.ToDouble();
		if (!value1.HasValue)
		{
			return GetCouldNotConvertToDoubleResult(n1);
		}

		var value2 = n2.ToDouble();
		if (!value2.HasValue)
		{
			return GetCouldNotConvertToDoubleResult(n2);
		}

		double result = calculate(value1.Value, value2.Value);
		return new CalculationResult
			{
				Answer = result
			};
	}

	private static CalculationResult GetCouldNotConvertToDoubleResult(string input)
	{
		return new CalculationResult
			{
				Message = "Could not convert '" + input + "' to a double"
			};
	}
}

Now that we have a WCF attribute decorated service we need a way to spin it up and make it HTTP accessible. I’ve chosen to encapsulate that process so I can easily apply it to any service I want to expose. You can see that most of the code is error handling and logging related.

public class WcfServiceWrapper<TServiceImplementation, TServiceContract>
	: ServiceBase
	where TServiceImplementation : TServiceContract
{
	private readonly string _serviceUri;
	private ServiceHost _serviceHost;

	public WcfServiceWrapper(string serviceName, string serviceUri)
	{
		_serviceUri = serviceUri;
		ServiceName = serviceName;
	}

	protected override void OnStart(string[] args)
	{
		Start();
	}

	protected override void OnStop()
	{
		Stop();
	}

	public void Start()
	{
		Console.WriteLine(ServiceName + " starting...");
		bool openSucceeded = false;
		try
		{
			if (_serviceHost != null)
			{
				_serviceHost.Close();
			}

			_serviceHost = new ServiceHost(typeof(TServiceImplementation));
		}
		catch (Exception e)
		{
			Console.WriteLine("Caught exception while creating " + ServiceName + ": " + e);
			return;
		}

		try
		{
			var webHttpBinding = new WebHttpBinding(WebHttpSecurityMode.None);
			_serviceHost.AddServiceEndpoint(typeof(TServiceContract), webHttpBinding, _serviceUri);

			var webHttpBehavior = new WebHttpBehavior
				{
					DefaultOutgoingResponseFormat = WebMessageFormat.Json
				};
			_serviceHost.Description.Endpoints[0].Behaviors.Add(webHttpBehavior);

			_serviceHost.Open();
			openSucceeded = true;
		}
		catch (Exception ex)
		{
			Console.WriteLine("Caught exception while starting " + ServiceName + ": " + ex);
		}
		finally
		{
			if (!openSucceeded)
			{
				_serviceHost.Abort();
			}
		}

		if (_serviceHost.State == CommunicationState.Opened)
		{
			Console.WriteLine(ServiceName + " started at " + _serviceUri);
		}
		else
		{
			Console.WriteLine(ServiceName + " failed to open");
			bool closeSucceeded = false;
			try
			{
				_serviceHost.Close();
				closeSucceeded = true;
			}
			catch (Exception ex)
			{
				Console.WriteLine(ServiceName + " failed to close: " + ex);
			}
			finally
			{
				if (!closeSucceeded)
				{
					_serviceHost.Abort();
				}
			}
		}
	}

	public new void Stop()
	{
		Console.WriteLine(ServiceName + " stopping...");
		try
		{
			if (_serviceHost != null)
			{
				_serviceHost.Close();
				_serviceHost = null;
			}
		}
		catch (Exception ex)
		{
			Console.WriteLine("Caught exception while stopping " + ServiceName + ": " + ex);
		}
		finally
		{
			Console.WriteLine(ServiceName + " stopped...");
		}
	}
}

Lastly we’ll use TopShelf to turn the whole thing into a Windows Service. One great feature of TopShelf is our program can be a simple console application and therefore easy to debug.

internal class Program
{
	private static void Main(string[] args)
	{
		const string serviceUri = "http://localhost:10000/calc";
		var host = HostFactory.New(c =>
			{
				c.Service<WcfServiceWrapper<Calculator, ICalculator>>(s =>
					{
						s.SetServiceName("CalculatorService");
						s.ConstructUsing(x => 
							new WcfServiceWrapper<Calculator, ICalculator>("Calculator", serviceUri));
						s.WhenStarted(service => service.Start());
						s.WhenStopped(service => service.Stop());
					});
				c.RunAsLocalSystem();

				c.SetDescription("Runs CalculatorService.");
				c.SetDisplayName("CalculatorService");
				c.SetServiceName("CalculatorService");
			});

		Console.WriteLine("Hosting ...");
		host.Run();
		Console.WriteLine("Done hosting ...");
	}
}

That’s it for the code. Let’s try it out. First run it as a console application (you’ll need to do this as an Administrator otherwise WCF won’t have security rights to listen on the port).

>Calculator.exe
Hosting ...
Calculator starting...
Calculator started at http://localhost:10000/calc

Now bring up a browser and ask the service to add two numbers:

http://localhost:10000/calc/Add/4.7/5.2

On the console we see

received request to Add 4.7 to 5.2

and in the browser we see the result

{"Answer":9.9,"Message":null}

Hit Control-C in the console window to shut down the calculator service.

Calculator stopping...
Calculator stopped...
Done hosting ...

Next let’s install the calculator as a Windows Service.

>Calculator.exe install
Hosting ...

Running a transacted installation.

Beginning the Install phase of the installation.
Installing service CalculatorService...
Service CalculatorService has been successfully installed.
Creating EventLog source CalculatorService in log Application...

The Install phase completed successfully, and the Commit phase is beginning.

The Commit phase completed successfully.

The transacted install has completed.
Done hosting ...

Now open the Windows Services snap-in and you’ll see CalculatorService in the list. Start it up then hit the service with your browser again.

http://localhost:10000/calc/Add/4.7/5.3

with response

{"Answer":10,"Message":null}

Success!

To uninstall the CalculatorService run it again from a command window with the uninstall argument. You don’t even have to stop the server first as TopShelf will do that for you.

>Calculator.exe uninstall
Hosting ...


The uninstall is beginning.
Removing EventLog source CalculatorService.
Service CalculatorService is being removed from the system...
Service CalculatorService was successfully removed from the system.
Attempt to stop service CalculatorService.

The uninstall has completed.
Done hosting ...

Enjoy!

30 Jun 2011 – Updated to DSL used in TopShelf version 2.2.1.1

Read Full Post »

Let’s build a Sort method that uses the quicksort algorithm. Note, we’re not trying to use TDD to derive the quicksort algorithm.

Step 1 – test for a null input

Stub the test

[TestFixture]
public class When_asked_to_sort_a_list
{
    [Test]
    public void Given_null()
    {
        Test.Verify(
            with_a_null_input,
            when_asked_to_sort_the_input,
            should_throw_an_ArgumentNullException
            );
    }
	
	public void Sort<T>(IList<T> input)
    {
        throw new NotImplementedException();
	}
}

next fill in the implied details

    private Exception _exception;
    private List<char> _input;

    [SetUp]
    public void BeforeEachTest()
    {
        _input = null;
        _exception = null;
    }
	
    private void with_a_null_input()
    {
        _input = null;
    }

    private void when_asked_to_sort_the_input()
    {
        try
        {
            Sort(_input);
        }
        catch (Exception <span class="hiddenGrammarError" pre="">exception)
        {
            _exception</span> = exception;
        }
    }

    private void should_throw_an_ArgumentNullException()
    {
        _exception.ShouldNotBeNull();
        _exception.ShouldBeOfType<ArgumentNullException>();
    }

run the test

WITH a null input
WHEN asked to sort the input
SHOULD throw an  Argument Null Exception - FAILED

then build the necessary implementation.

    public void Sort<T>(IList<T> input)
    {
        if (input == null)
        {
            throw new ArgumentNullException();
        }
        throw new NotImplementedException();
    }

Step 2 – test for an empty input

Write the test

    [Test]
    public void Given_an_empty_list()
    {
        Test.Verify(
            with_an_empty_list,
            when_asked_to_sort_the_input,
            should_not_throw_an_Exception
            );
    }
	
    private void with_an_empty_list()
    {
        _input = new List<char>();
    }

    private void should_not_throw_an_Exception()
    {
        _exception.ShouldBeNull();
    }	

run the test

WITH an empty list
WHEN asked to sort the input
SHOULD not throw an  Exception - FAILED

FluentAssert.Exceptions.ShouldBeNullAssertionException :   Expected: null
  But was:  System.NotImplementedException: The method or operation is not implemented.

then update the implementation.

    public void Sort<T>(IList<T> input)
    {
        if (input == null)
        {
            throw new ArgumentNullException();
        }
        if (!input.Any())
        {
            return;
        }
        throw new NotImplementedException();
    }

Step 3 – test for a single-element input

    [Test]
    public void Given_a_single_element_list()
    {
        Test.Verify(
            with_a_list,
            that_has_element__A,
            when_asked_to_sort_the_input,
            should_not_throw_an_Exception,
            the_element_of_the_list_should_be__A
            );
    }

    private void with_a_list()
    {
        with_an_empty_list();
    }

    private void that_has_element__A()
    {
        _input.Add('A');
    }

    private void the_element_of_the_list_should_be__A()
    {
        _input.ShouldContainAllInOrder(new[] { 'A' });
    }	

run the test

WITH a list
that has element   A - PASSED
WHEN asked to sort the input
SHOULD not throw an  Exception - FAILED

FluentAssert.Exceptions.ShouldBeNullAssertionException :   Expected: null
  But was:  System.NotImplementedException: The method or operation is not implemented.

update the implementation.

    public void Sort<T>(IList<T> input)
    {
        if (input == null)
        {
            throw new ArgumentNullException();
        }
        if (input.Count <= 1)
        {
            return;
        }
        throw new NotImplementedException();
    }

Step 4a – test sorting list { A B }

    [Test]
    public void Given_a_list_with_elements__A_B()
    {
        Test.Verify(
            with_a_list,
            that_starts_with_element__A,
            followed_by_element__B,
            when_asked_to_sort_the_input,
            should_not_throw_an_Exception,
            the_elements_of_the_sorted_list_should_be__A_B__in_that_order
            );
    }

    private void that_starts_with_element__A()
    {
        that_has_element__A();
    }

    private void followed_by_element__B()
    {
        _input.Add('B');
    }

    private void the_elements_of_the_sorted_list_should_be__A_B__in_that_order()
    {
        _input.ShouldContainAllInOrder("AB".ToArray());
    }

run the test

WITH a list
that starts with element   A - PASSED
followed by element   B - PASSED
WHEN asked to sort the input
SHOULD not throw an  Exception - FAILED

FluentAssert.Exceptions.ShouldBeNullAssertionException :   Expected: null
  But was:  System.NotImplementedException: The method or operation is not implemented.

update the implementation.

    public void Sort<T>(IList<T> input)
    {
        if (input == null)
        {
            throw new ArgumentNullException();
        }
        if (input.Count <= 1)
        {
            return;
        }

    }

Step 4b – test sorting list { B A }

    [Test]
    public void Given_a_list_with_elements__B_A()
    {
        Test.Verify(
            with_a_list,
            that_starts_with_element__B,
            followed_by_element__A,
            when_asked_to_sort_the_input,
            should_not_throw_an_Exception,
            the_elements_of_the_sorted_list_should_be__A_B__in_that_order
            );
    }

    private void that_has_element__B()
    {
        _input.Add('B');
    }

    private void followed_by_element__B()
    {
        that_has_element__B();
    }

    private void that_starts_with_element__B()
    {
        that_has_element__B();
    }

    private void followed_by_element__A()
    {
        that_has_element__A();
    }

run the test

WITH a list
that starts with element   B - PASSED
followed by element   A - PASSED
WHEN asked to sort the input
SHOULD not throw an  Exception - PASSED
the elements of the sorted list should be   A  B  in that order - FAILED

FluentAssert.Exceptions.ShouldBeEqualAssertionException : at offset 0

update the implementation.

    public void Sort<T>(IList<T> input) 
        where T : IComparable
    {
        if (input == null)
        {
            throw new ArgumentNullException();
        }
        if (input.Count <= 1)
        {
            return;
        }
        SwapIfOutOfOrder(input, 0, 1);
    }

    private static void SwapIfOutOfOrder<T>(IList<T> input, int first, int second)
        where T : IComparable
    {
        if (input[first].CompareTo(input[second]) == 1)
        {
            var temp = input[first];
            input[first] = input[second];
            input[second] = temp;
        }
    }

Step 4c – track comparisons

In order to verify that the sort algorithm used by Sort is quicksort we need the ability to introspect what it is doing. Verifying that the output is sorted is insufficient because it could be using some other sort algorithm and we specifically want to implement quicksort. In a previous post we passed an instrumented IComparer along with the list to the sorted to the Sort method. This time we’ll try something different. We’ll wrap the objects being sorted so that we can count the number of comparisons performed during the sort. This has the advantage of working with any Sort implementation that can sort IComparable elements and with lists that contain duplicate elements.

public class ComparableWrapper<T> : IComparable
    where T : IComparable
{
    private readonly T _value;
    private readonly Action _action;

    public ComparableWrapper(T value, Action action)
    {
        _value = value;
        _action = action;
    }

    public T Value
    {
        get { return _value; }
    }

    public int CompareTo(object obj)
    {
        var otherWrapper = obj as ComparableWrapper<T>;
        if (otherWrapper == null)
        {
            throw new ArgumentException("Object is not a ComparableWrapper");
        }
        _action();
        return _value.CompareTo(otherWrapper._value);
    }
}

update the tests to use the wrapper

    private List<ComparableWrapper<char>> _input;
    private int _comparisonCount;

    private void IncrementCount()
    {
        _comparisonCount++;
    }
	
	[SetUp]
    public void BeforeEachTest()
    {
        _comparisonCount = 0;
        _input = null;
        _exception = null;
    }
	
    private void with_an_empty_list()
    {
        _input = new List<ComparableWrapper<char>>();
    }	
	
    private void Add(char value)
    {
        _input.Add(new ComparableWrapper<char>(value, IncrementCount));
    }

    private void that_has_element__A()
    {
        Add('A');
    }

    private void that_has_element__B()
    {
        Add('B');
    }

    private void the_element_of_the_list_should_be__A()
    {
        _input.Select(x => x.Value)
              .ShouldContainAllInOrder(new[] {'A'});
    }
	
    private void the_elements_of_the_sorted_list_should_be__A_B__in_that_order()
    {
        _input.Select(x => x.Value)
          .ShouldContainAllInOrder("AB".ToArray());
    }	

and add to the tests to check the comparison count.

    [Test]
    public void Given_a_list_with_elements__A_B()
    {
        Test.Verify(
            with_a_list,
            that_starts_with_element__A,
            followed_by_element__B,
            when_asked_to_sort_the_input,
            should_not_throw_an_Exception,
            the_elements_of_the_sorted_list_should_be__A_B__in_that_order,
            should_perform_at_most_1_comparison
            );
    }

    private void should_perform_at_most_1_comparison()
    {
        _comparisonCount.ShouldBeLessThanOrEqualTo(1);
    }
	
    [Test]
    public void Given_a_list_with_elements__B_A()
    {
        Test.Verify(
            with_a_list,
            that_starts_with_element__B,
            followed_by_element__A,
            when_asked_to_sort_the_input,
            should_not_throw_an_Exception,
            the_elements_of_the_sorted_list_should_be__A_B__in_that_order,
            should_perform_at_most_1_comparison
            );
    }	

Step 5a – test sorting list { C B A }

    [Test]
    public void Given_a_list_with_elements__C_B_A()
    {
        Test.Verify(
            with_a_list,
            that_starts_with_element__C,
            followed_by_element__B,
            followed_by_element__A,
            when_asked_to_sort_the_input,
            should_not_throw_an_Exception,
            the_elements_of_the_sorted_list_should_be__A_B_C__in_that_order,
            should_perform_at_most_3_comparisons
            );
    }

    private void that_starts_with_element__C()
    {
        that_has_element__C();
    }

    private void that_has_element__C()
    {
        Add('C');
    }

    private void the_elements_of_the_sorted_list_should_be__A_B_C__in_that_order()
    {
        _input.Select(x => x.Value)
            .ShouldContainAllInOrder("ABC".ToArray());
    }

    private void should_perform_at_most_3_comparisons()
    {
        _comparisonCount.ShouldBeLessThanOrEqualTo(3);
    }

run the test

WITH a list
that starts with element   C - PASSED
followed by element   B - PASSED
followed by element   A - PASSED
WHEN asked to sort the input
SHOULD not throw an  Exception - PASSED
the elements of the sorted list should be   A  B  C  in that order - FAILED

FluentAssert.Exceptions.ShouldBeEqualAssertionException : at offset 0

update the implementation.

    public void Sort<T>(IList<T> input)
        where T : IComparable
    {
        if (input == null)
        {
            throw new ArgumentNullException();
        }
        if (input.Count <= 1)
        {
            return;
        }
        SwapIfOutOfOrder(input, 0, 1);
        if (input.Count == 2)
        {
            return;
        }
        SwapIfOutOfOrder(input, 0, 2);
        SwapIfOutOfOrder(input, 1, 2);
    }

Step 5b – test sorting all permutations of list { C B A }

To make sure the code works for all permutations of the list we’ll make use of the Permute extension and FormatForDisplay extension built in previous postings.

    [Test]
    public void Should_be_able_to_sort_all_permutations_of__C_B_A__with_at_most_3_comparisons_each()
    {
        with_a_list();
        that_starts_with_element__C();
        followed_by_element__B();
        followed_by_element__A();

        foreach (var permutation in _input.Permute(3))
        {
            _comparisonCount = 0;
            _input = permutation.ToList();
            try
            {
                when_asked_to_sort_the_input();
                the_elements_of_the_sorted_list_should_be__A_B_C__in_that_order();
                should_perform_at_most_3_comparisons();
            }
            catch (Exception e)
            {
                Console.WriteLine("failed on permutation " + permutation.Select(x => x.Value).FormatForDisplay());
                throw;
            }
        }
    }

run the test and get an unexpected failure

failed on permutation { C A B }

FluentAssert.Exceptions.ShouldBeEqualAssertionException : at offset 0

due to an assumption made about the values returned by CompareTo. Fix it:

    private static void SwapIfOutOfOrder<T>(IList<T> input, int first, int second)
        where T : IComparable
    {
        if (input[first].CompareTo(input[second]) > 0)
        {
            var temp = input[first];
            input[first] = input[second];
            input[second] = temp;
        }
    }

Step 6a – test sorting list { D C A B }

    [Test]
    public void Given_a_list_with_elements__D_C_A_B()
    {
        Test.Verify(
            with_a_list,
            that_starts_with_element__D,
            followed_by_element__C,
            followed_by_element__A,
            followed_by_element__B,
            when_asked_to_sort_the_input,
            should_not_throw_an_Exception,
            the_elements_of_the_sorted_list_should_be__A_B_C_D__in_that_order,
            should_perform_at_most_6_comparisons
            );
    }
	
	private void that_starts_with_element__D()
    {
        that_has_element__D();
    }
	
    private void that_has_element__D()
    {
        Add('D');
    }

	private void followed_by_element__C()
    {
        that_has_element__C();
    }
	
    private void the_elements_of_the_sorted_list_should_be__A_B_C_D__in_that_order()
    {
        _input.Select(x => x.Value)
            .ShouldContainAllInOrder("ABCD".ToArray());
    }

    private void should_perform_at_most_6_comparisons()
    {
        _comparisonCount.ShouldBeLessThanOrEqualTo(6);
    }	

run the test

WITH a list
that starts with element   D - PASSED
followed by element   C - PASSED
followed by element   A - PASSED
followed by element   B - PASSED
WHEN asked to sort the input
SHOULD not throw an  Exception - PASSED
the elements of the sorted list should be   A  B  C  D  in that order - FAILED

FluentAssert.Exceptions.ShouldBeEqualAssertionException : at offset 0

update the implementation to use bubble sort.

    public void Sort<T>(IList<T> input) where T : IComparable
    {
        if (input == null)
        {
            throw new ArgumentNullException();
        }
        for (int outer = input.Count; outer >= 1; outer--)
        {
            for (int inner = 0; inner < outer - 1; inner++)
            {
                SwapIfOutOfOrder(input, inner, inner + 1);
            }
        }
    }

Step 6b – test sorting all permutations of list { D C A B } with at most 6 comparisons each

    [Test]
    public void Should_be_able_to_sort_all_permutations_of__D_C_A_B__with_at_most_6_comparisons_each()
    {
        with_a_list();
        that_starts_with_element__D();
        followed_by_element__C();
        followed_by_element__A();
        followed_by_element__B();

        foreach (var permutation in _input.Permute(4))
        {
            _comparisonCount = 0;
            _input = permutation.ToList();
            try
            {
                when_asked_to_sort_the_input();
                the_elements_of_the_sorted_list_should_be__A_B_C_D__in_that_order();
                should_perform_at_most_6_comparisons();
            }
            catch (Exception e)
            {
                Console.WriteLine("failed on permutation " + permutation.Select(x => x.Value).FormatForDisplay());
                throw;
            }
        }
    }

test passes.

Step 6c – test sorting all permutations of list { D C A B } with less than 6 comparisons each on average

This is the where we force the implementation away from bubble sort. Bubble sort always uses 6 comparisons to sort 4 elements but quicksort only needs 6 in the worst case.

    [Test]
    public void Should_be_able_to_sort_all_permutations_of__D_C_A_B__with_less_than_6_comparisons_each_on_average()
    {
        with_a_list();
        that_starts_with_element__D();
        followed_by_element__C();
        followed_by_element__A();
        followed_by_element__B();

        int totalComparisons = 0;
        int totalPermutations = 0;
        foreach (var permutation in _input.Permute(4))
        {
            totalPermutations++;
            _comparisonCount = 0;
            _input = permutation.ToList();
            try
            {
                when_asked_to_sort_the_input();
                the_elements_of_the_sorted_list_should_be__A_B_C_D__in_that_order();
                should_perform_at_most_6_comparisons();
                totalComparisons += _comparisonCount;
            }
            catch (Exception e)
            {
                Console.WriteLine("failed on permutation " + permutation.Select(x => x.Value).FormatForDisplay());
                throw;
            }
        }

        var average = totalComparisons / (double)totalPermutations;
        average.ShouldBeLessThan(6, () => "Incorrect average number of comparisons - " + average);
    }

run the test

FluentAssert.Exceptions.ShouldBeLessThanAssertionException : Incorrect average number of comparisons - 6

update the implementation using the wikipedia algorithm as a guide.

    public void Sort<T>(IList<T> input)
        where T : IComparable
    {
        if (input == null)
        {
            throw new ArgumentNullException();
        }

        var segments = new Stack<int[]>();
        segments.Push(new[] { 0, input.Count - 1 });
        while (segments.Any())
        {
            var segment = segments.Pop();
            int first = segment[0];
            int last = segment[1];
            int middle = GetMiddle(first, last);
            while (first < last)
            {
                while (first < middle && AreInCorrectOrder(input, first, middle))
                {
                    first++;
                }
                while (last > middle && AreInCorrectOrder(input, middle, last))
                {
                    last--;
                }
                if (first < last)
                {
                    Swap(input, first, last);
                    first++;
                    last--;
                    if (first - 1 == middle)
                    {
                        middle = ++last;
                    }
                    else if (last + 1 == middle)
                    {
                        middle = --first;
                    }
                }
            }
            if (middle - segment[0] > 1)
            {
                segments.Push(new[] { segment[0], middle - 1 });
            }
            if (segment[1] - middle > 1)
            {
                segments.Push(new[] { middle + 1, segment[1] });
            }
        }
    }

    private static int GetMiddle(int first, int last)
    {
        return first + ((last - first) >> 1);
    }
	
    private static bool AreInCorrectOrder<T>(IList<T> input, int first, int second)
        where T : IComparable
    {
        return input[first].CompareTo(input[second]) <= 0;
    }	

    private static void Swap<T>(IList<T> input, int first, int second)
    {
        var temp = input[first];
        input[first] = input[second];
        input[second] = temp;
    }

Step 7 – make sure the implementation can handle all permutations of list { A B C D E F G H }

    [Test]
    public void Should_be_able_to_sort_all_permutations_of__A_B_C_D_E_F_G_H()
    {
        with_a_list();
        "ABCDEFGH".ToList().ForEach(Add);

        foreach (var permutation in _input.Permute(8))
        {
            _comparisonCount = 0;
            _input = permutation.ToList();
            try
            {
                when_asked_to_sort_the_input();
                the_elements_of_the_sorted_list_should_be__A_B_C_D_E_F_G_H_in_that_order();
            }
            catch (Exception e)
            {
                Console.WriteLine("failed on permutation " + permutation.Select(x => x.Value).FormatForDisplay());
                throw;
            }
        }
    }

    private void the_elements_of_the_sorted_list_should_be__A_B_C_D_E_F_G_H_in_that_order()
    {
        _input.Select(x => x.Value)
            .ShouldContainAllInOrder("ABCDEFGH".ToArray());
    }

test passes.

Final – how does this implementation compare to List.Sort?

First get the number of comparisons our Sort uses across all permutations of { A B C D E F G H }

    var input = "ABCDEFGH"
        .Select(x=>new ComparableWrapper<char>(x, IncrementCount))
        .ToList();

    foreach (var permutation in input.Permute(8))
    {
        Sort(permutation);
    }
    Console.WriteLine("total comparisons: "+_comparisonCount);

output

total comparisons: 682272

Now get the number of comparisons List.Sort uses for the same set of inputs.

    var input = "ABCDEFGH"
        .Select(x=>new ComparableWrapper<char>(x, IncrementCount))
        .ToList();

    foreach (var permutation in input.Permute(8))
    {
        permutation.ToList().Sort();
    }
    Console.WriteLine("total comparisons: "+_comparisonCount);

output

total comparisons: 1209888

whoa! Our no-frills implementation of quicksort uses only 56% of the comparisons that List.Sort requires to achieve the same goal!

682272 / 1209888 = 0.563913354

It kind of makes you wonder: which sort algorithm does List.Sort use?

Read Full Post »

Older Posts »

%d bloggers like this: