Benchmarking C++ container iteration

In this stackoverflow question I made a point that iterating over a std::vector and a std::list would not be of any noticeable difference.

After someone said that it is indeed noticeable, I decided to make a small benchmark to try it out.

[Note: This post has been edited with a new conclusion.]

Skip to the results.

Skip to conclusion.

Skip to new conclusion.

This was the program I used for the benchmark:

#include <iostream>
#include <iomanip>
#include <vector>
#include <deque>
#include <list>
#include <random>
#include <limits>
#include <chrono>

template<typename Tc>
void generate(Tc& container)
{
	std::random_device rd;
	std::mt19937 gen(rd());
	std::uniform_int_distribution<> dis(0);

	for (int i = 0; i < 10000000; i++)
		container.emplace_back(dis(gen));
}

template<typename Tc>
void iterate(Tc& container)
{
	for (const auto& v __attribute__((unused)) : container)
	{
	}
}

template<typename Tc>
uint64_t time(Tc& container, void (*function)(Tc&))
{
	auto start = std::chrono::system_clock::now();
	function(container);
	auto end = std::chrono::system_clock::now();

 	return std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
}

template<typename Tc>
void test(const std::string& type, const int times)
{
	uint64_t gentot = 0;
	uint64_t itrtot = 0;

	std::vector<uint64_t> gen;
	std::vector<uint64_t> itr;


	for (int i = 0; i < times; i++)
	{
		Tc c;

		gen.emplace_back(time(c, generate));
		itr.emplace_back(time(c, iterate ));

		gentot += gen.back();
		itrtot += itr.back();
	}

	std::sort(std::begin(gen), std::end(gen));
	std::sort(std::begin(itr), std::end(itr));

	std::cout << "Result for " << type << ":\n";
	std::cout << "    Generate: Total   " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << (gentot / 1000.0) << " seconds\n";
	std::cout << "              Avarage " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << ((gentot / times) / 1000.0) << " seconds\n";
	std::cout << "              Medium  " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << (gen[times / 2] / 1000.0) << " seconds\n";
	std::cout << "    Iterate : Total   " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << (itrtot / 1000.0) << " seconds\n";
	std::cout << "              Avarage " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << ((itrtot / times) / 1000.0) << " seconds\n";
	std::cout << "              Medium  " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << (itr[times / 2] / 1000.0) << " seconds\n";
}

int main()
{
	test<std::vector<int>>("vector", 10);
	test<std::deque <int>>("deque" , 10);
	test<std::list  <int>>("list"  , 10);
}

Now for the result of the above program, with 10000000 entries in each container and 10 tests of each container, it was this on my machine:

Result for vector:
    Generate: Total   11.701 seconds
              Avarage 1.170 seconds
              Medium  1.171 seconds
    Iterate : Total   0.373 seconds
              Avarage 0.037 seconds
              Medium  0.037 seconds
Result for deque:
    Generate: Total   12.304 seconds
              Avarage 1.230 seconds
              Medium  1.233 seconds
    Iterate : Total   0.375 seconds
              Avarage 0.037 seconds
              Medium  0.038 seconds
Result for list:
    Generate: Total   14.822 seconds
              Avarage 1.482 seconds
              Medium  1.467 seconds
    Iterate : Total   0.470 seconds
              Avarage 0.047 seconds
              Medium  0.047 seconds

As seen, for ten million entries it is indeed slower to iterate over a list than over a vector or a deque. A whole one hundredth of a second! So while my opponent is right in that it's slower to iterate over a list, I stand by original comment stating that it's not noticeable.

Miscellaneous information

My computer have a Intel Core i7 processor with six dual-thread cores, model 3930K running at 3.20Ghz, and I have 32GB RAM. The test, as seen above, is not utilizing thread.

I used the following command line to compile the program:

clang++ -Wall -Wextra -Weffc++ -Wpedantic -pipe -g3 -pthread -std=c++11 -stdlib=libc++ -lc++abi iter.cpp

No special optimizations was enabled.


Edit:

After updating the above program to use a structure instead of a single int, the result changed quite a bit:

Result for vector:
    Generate: Total   14.447 seconds
              Avarage 1.444 seconds
              Medium  1.439 seconds
    Iterate : Total   0.360 seconds
              Avarage 0.036 seconds
              Medium  0.036 seconds
Result for deque:
    Generate: Total   9.390 seconds
              Avarage 0.939 seconds
              Medium  0.941 seconds
    Iterate : Total   1.243 seconds
              Avarage 0.124 seconds
              Medium  0.124 seconds
Result for list:
    Generate: Total   9.777 seconds
              Avarage 0.977 seconds
              Medium  0.965 seconds
    Iterate : Total   0.873 seconds
              Avarage 0.087 seconds
              Medium  0.088 seconds

As seen the iteration for a vector haven't changed much, but the times for the list have, and especially for the deque. The time to iterate over the list almost doubled, but the time to iterate over the deque almost quadrupled!

My guess is that it has something to do with the caching, and I have to look closer using tools such as cachegrind.

Here is the updated source code:

#include <iostream>
#include <iomanip>
#include <vector>
#include <deque>
#include <list>
#include <random>
#include <limits>
#include <chrono>

struct S
{
	int         i;
	double      d;
	std::string s;
};

template<typename Tc>
void generate(Tc& container)
{
	for (int i = 0; i < 10000000; i++)
		container.push_back(typename Tc::value_type());
}

template<typename Tc>
void iterate(Tc& container)
{
	for (const auto& v __attribute__((unused)) : container)
	{
	}
}

template<typename Tc>
uint64_t time(Tc& container, void (*function)(Tc&))
{
	auto start = std::chrono::system_clock::now();
	function(container);
	auto end = std::chrono::system_clock::now();

 	return std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
}

template<typename Tc>
void test(const std::string& type, const int times)
{
	uint64_t gentot = 0;
	uint64_t itrtot = 0;

	std::vector<uint64_t> gen;
	std::vector<uint64_t> itr;


	for (int i = 0; i < times; i++)
	{
		Tc c;

		gen.emplace_back(time(c, generate));
		itr.emplace_back(time(c, iterate ));

		gentot += gen.back();
		itrtot += itr.back();
	}

	std::sort(std::begin(gen), std::end(gen));
	std::sort(std::begin(itr), std::end(itr));

	std::cout << "Result for " << type << ":\n";
	std::cout << "    Generate: Total   " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << (gentot / 1000.0) << " seconds\n";
	std::cout << "              Avarage " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << ((gentot / times) / 1000.0) << " seconds\n";
	std::cout << "              Medium  " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << (gen[times / 2] / 1000.0) << " seconds\n";
	std::cout << "    Iterate : Total   " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << (itrtot / 1000.0) << " seconds\n";
	std::cout << "              Avarage " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << ((itrtot / times) / 1000.0) << " seconds\n";
	std::cout << "              Medium  " << std::left << std::setw(5) << std::setprecision(3) << std::fixed << (itr[times / 2] / 1000.0) << " seconds\n";
}

int main()
{
	test<std::vector<S>>("vector", 10);
	test<std::deque <S>>("deque" , 10);
	test<std::list  <S>>("list"  , 10);
}

 

Leave a comment