@inproceedings{35b265c525b844c38715d9e932c118e8,
title = "Convergent gradient ascent in general-sum games",
abstract = "In this work we look at the recent results in policy gradient learning in a general-sum game scenario, in the form of two algorithms, IGA and WoLF-IGA. We address the drawbacks in convergence properties of these algorithms, and propose a more accurate version of WoLF-IGA that is guaranteed to converge to Nash Equilibrium policies in self-play (or against an IGA learner). We also present a control theoretic interpretation of variable learning rate which not only justifies WoLF-IGA, but also shows it to achieve fastest convergence under some constraints. Finally we derive optimal learning rates for fastest convergence in practical simulations.",
author = "Bikramjit Banerjee and Jing Peng",
note = "Publisher Copyright: {\textcopyright} Springer-Verlag Berlin Heidelberg 2002.; 13th European Conference on Machine Learning, ECML 2002 ; Conference date: 19-08-2002 Through 23-08-2002",
year = "2002",
doi = "10.1007/3-540-36755-1_1",
language = "English",
isbn = "9783540440369",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "1--9",
editor = "Tapio Elomaa and Heikki Mannila and Hannu Toivonen",
booktitle = "Machine Learning",
}