<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD Journal Publishing DTD v3.0 20080202//EN" "http://dtd.nlm.nih.gov/publishing/3.0/journalpublishing3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="3.0" xml:lang="en">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">plos</journal-id>
      <journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id>
      <journal-id journal-id-type="pmc">ploscomp</journal-id>
      <journal-title-group>
        <journal-title>PLoS Computational Biology</journal-title>
      </journal-title-group>
      <issn pub-type="ppub">1553-734X</issn>
      <issn pub-type="epub">1553-7358</issn>
      <publisher>
        <publisher-name>Public Library of Science</publisher-name>
        <publisher-loc>San Francisco, USA</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">PCOMPBIOL-D-12-00875</article-id>
      <article-id pub-id-type="doi">10.1371/journal.pcbi.1002922</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Article</subject>
        </subj-group>
        <subj-group subj-group-type="Discipline-v2">
          <subject>Biology</subject>
          <subj-group>
            <subject>Neuroscience</subject>
            <subj-group>
              <subject>Computational neuroscience</subject>
              <subj-group>
                <subject>Coding mechanisms</subject>
                <subject>Sensory systems</subject>
              </subj-group>
            </subj-group>
            <subj-group>
              <subject>Sensory systems</subject>
              <subj-group>
                <subject>Visual system</subject>
              </subj-group>
            </subj-group>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="Discipline-v2">
          <subject>Physics</subject>
          <subj-group>
            <subject>Biophysics</subject>
            <subj-group>
              <subject>Biophysics theory</subject>
            </subj-group>
          </subj-group>
          <subj-group>
            <subject>Statistical mechanics</subject>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="Discipline">
          <subject>Neuroscience</subject>
          <subject>Physics</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Stimulus-dependent Maximum Entropy Models of Neural Population Codes</article-title>
        <alt-title alt-title-type="running-head">Stimulus-dependent Maxent Models for Neural Codes</alt-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
          <name name-style="western">
            <surname>Granot-Atedgi</surname>
            <given-names>Einat</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
          <name name-style="western">
            <surname>Tkačik</surname>
            <given-names>Gašper</given-names>
          </name>
          <xref ref-type="aff" rid="aff2">
            <sup>2</sup>
          </xref>
          <xref ref-type="corresp" rid="cor1">
            <sup>*</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Segev</surname>
            <given-names>Ronen</given-names>
          </name>
          <xref ref-type="aff" rid="aff3">
            <sup>3</sup>
          </xref>
          <xref ref-type="fn" rid="fn1">
            <sup>¶</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Schneidman</surname>
            <given-names>Elad</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
          <xref ref-type="fn" rid="fn1">
            <sup>¶</sup>
          </xref>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <addr-line>Department of Neurobiology, Weizmann Institute of Science, Rehovot, Israel</addr-line>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <addr-line>Institute of Science and Technology Austria, Am Campus 1, Klosterneuburg, Austria</addr-line>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <addr-line>Faculty of Natural Sciences, Department of Life Sciences and Zlotowski Center for Neuroscience, Ben Gurion University of the Negev, Be'er Sheva, Israel</addr-line>
      </aff>
      <contrib-group>
        <contrib contrib-type="editor" xlink:type="simple">
          <name name-style="western">
            <surname>Sporns</surname>
            <given-names>Olaf</given-names>
          </name>
          <role>Editor</role>
          <xref ref-type="aff" rid="edit1"/>
        </contrib>
      </contrib-group>
      <aff id="edit1">
        <addr-line>Indiana University, United States of America</addr-line>
      </aff>
      <author-notes>
        <corresp id="cor1">* E-mail: <email xlink:type="simple">gtkacik@ist.ac.at</email></corresp>
        <fn fn-type="conflict">
          <p>The authors have declared that no competing interests exist.</p>
        </fn>
        <fn fn-type="con">
          <p>Conceived and designed the experiments: EGA GT RS ES. Performed the experiments: EGA RS. Analyzed the data: EGA GT RS ES. Wrote the paper: GT RS ES.</p>
        </fn>
        <fn id="fn1" fn-type="other">
          <p>¶ RS and ES also contributed equally to this work.</p>
        </fn>
      </author-notes>
      <pub-date pub-type="collection">
        <month>3</month>
        <year>2013</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>14</day>
        <month>3</month>
        <year>2013</year>
      </pub-date>
      <volume>9</volume>
      <issue>3</issue>
      <elocation-id>e1002922</elocation-id>
      <history>
        <date date-type="received">
          <day>30</day>
          <month>5</month>
          <year>2012</year>
        </date>
        <date date-type="accepted">
          <day>28</day>
          <month>12</month>
          <year>2012</year>
        </date>
      </history>
      <permissions>
        <copyright-year>2013</copyright-year>
        <copyright-holder>Granot-Atedgi et al</copyright-holder>
        <license xlink:type="simple">
          <license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
        </license>
      </permissions>
      <abstract>
        <p>Neural populations encode information about their stimulus in a collective fashion, by joint activity patterns of spiking and silence. A full account of this mapping from stimulus to neural activity is given by the conditional probability distribution over neural codewords given the sensory input. For large populations, direct sampling of these distributions is impossible, and so we must rely on constructing appropriate models. We show here that in a population of 100 retinal ganglion cells in the salamander retina responding to temporal white-noise stimuli, dependencies between cells play an important encoding role. We introduce the stimulus-dependent maximum entropy (SDME) model—a minimal extension of the canonical linear-nonlinear model of a single neuron, to a pairwise-coupled neural population. We find that the SDME model gives a more accurate account of single cell responses and in particular significantly outperforms uncoupled models in reproducing the distributions of population codewords emitted in response to a stimulus. We show how the SDME model, in conjunction with static maximum entropy models of population vocabulary, can be used to estimate information-theoretic quantities like average surprise and information transmission in a neural population.</p>
      </abstract>
      <abstract abstract-type="summary">
        <title>Author Summary</title>
        <p>In the sensory periphery, stimuli are represented by patterns of spikes and silences across a population of sensory neurons. Because the neurons form an interconnected network, the code cannot be understood by looking at single cells alone. Recent recordings in the retina have enabled us to study populations of a hundred or more neurons that carry the visual information into the brain, and thus build probabilistic models of the neural code. Here we present a minimal (maximum entropy) yet powerful extension of well-known linear/nonlinear models for independent neurons, to an interacting population. This model reproduces the behavior of single cells as well as the structure of correlations in neural spiking. Our model predicts much better the complete set of patterns of spiking and silence across a population of cells, allowing us to explore the properties of the stimulus-response mapping, and estimate the information transmission, in bits per second, that the population carries about the stimulus. Our results show that to understand the code, we need to shift our focus from reproducing single-cell properties (such as firing rates) towards understanding the total “vocabulary” of patterns emitted by the population, and that network correlations play a central role in shaping the code of large neural populations.</p>
      </abstract>
      <funding-group>
        <funding-statement>This work was supported by The Israel Science Foundation and the Human Frontiers Science Program. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
      </funding-group>
      <counts>
        <page-count count="14"/>
      </counts>
    </article-meta>
  </front>
  <body>
    <sec id="s1">
      <title>Introduction</title>
      <p>Neurons represent and transmit information using temporal sequences of short stereotyped bursts of electrical activity, or spikes <xref ref-type="bibr" rid="pcbi.1002922-Rieke1">[1]</xref>. Much of what we know about this encoding has been learned by studying the mapping between stimuli and responses at the level of single neurons, and building detailed models of what stimulus features drive a single neuron to spike <xref ref-type="bibr" rid="pcbi.1002922-AgerayArcas1">[2]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Schwartz1">[4]</xref>. In most of the nervous system, however, information is represented by joint activity patterns of spiking and silence over populations of cells. In a sensory context, these patterns can be thought of as codewords that convey information about external stimuli to the central nervous system. One of the challenges of neuroscience is to understand the neural <italic>codebook</italic>—a map from the stimuli to the neural codewords—a task made difficult by the fact that neurons respond to the stimulus neither deterministically nor independently.</p>
      <p>The structure of correlations among the neurons determines the organization of the code, that is, how different stimuli are represented by the population activity <xref ref-type="bibr" rid="pcbi.1002922-Stopfer1">[5]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Averbeck1">[8]</xref>. These correlations also determine what the brain, having no access to the stimulus apart from the spikes coming from the sensory periphery, can learn about the outside world <xref ref-type="bibr" rid="pcbi.1002922-Brunel1">[9]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Sompolinsky1">[11]</xref>. The source of these correlations, which arise either from the correlated external stimuli to the neurons, from “shared” local input from other neurons, or from “private” independent noise, has been heavily debated <xref ref-type="bibr" rid="pcbi.1002922-Schneidman1">[12]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Averbeck2">[15]</xref>. In many neural systems, the correlation between pairs of (even nearby or functionally similar) neurons was found to be weak <xref ref-type="bibr" rid="pcbi.1002922-Bair1">[16]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Schneidman2">[18]</xref>. Similarly, the redundancy between pairs in terms of the information they convey about their stimuli was also typically weak <xref ref-type="bibr" rid="pcbi.1002922-Puchalla1">[19]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Chechik1">[21]</xref>. The low correlations and redundancies between pairs of neurons therefore led to the suggestion that neurons in larger populations might encode information independently <xref ref-type="bibr" rid="pcbi.1002922-Nirenberg2">[22]</xref>, which was echoed by theoretical ideas of maximally efficient neural codes <xref ref-type="bibr" rid="pcbi.1002922-Barlow1">[23]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Barlow2">[25]</xref>.</p>
      <p>Recent studies of the neural code in large populations have, however, revealed that while the typical pairwise correlations may be weak, larger populations of neurons can nevertheless be strongly correlated as a whole <xref ref-type="bibr" rid="pcbi.1002922-Schneidman2">[18]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Schnitzer1">[26]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Ganmor1">[33]</xref>. Maximum entropy models of neural populations have shown that such strong network correlations can be the result of collective effects of pairwise dependencies between cells, and, in some cases, of sparse high-order dependencies <xref ref-type="bibr" rid="pcbi.1002922-Schneidman2">[18]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Ohiorhenuan1">[34]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Tkaik3">[36]</xref>. Most of these studies have characterized the strength of network effects and spiking synchrony at the level of the total <italic>vocabulary</italic> of the population, i.e. the distribution of codewords averaged over all the stimuli. It is not immediately clear how these findings affect stimulus encoding, where one needs to distinguish the impact of correlated stimuli that the cells receive (“stimulus correlations”), from the impact of co-variance of the cells conditional on the stimulus (“noise correlations”). For small populations of neurons, it has been shown that taking into account correlations for decoding or reconstructing the stimulus can be beneficial compared to the case where correlations are neglected (e.g. <xref ref-type="bibr" rid="pcbi.1002922-Ganmor2">[35]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Warland1">[37]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Brown1">[40]</xref>). Similarly, generalized linear models highlighted the importance of dependencies between cells in accounting for correlations between pairs and triplets of retinal ganglion cell responses <xref ref-type="bibr" rid="pcbi.1002922-Pillow1">[41]</xref>.</p>
      <p>Here we present a new encoding model that allows us to study in fine detail the codebook of a large neural population. We define the <italic>codewords</italic> to be the joint activity patterns of the population in time windows whose duration reflects the typical width of the cross-correlation of spiking between pairs of neurons. Importantly, this model gives a joint probability distribution over the activity patterns of the whole population for a given stimulus, while capturing both the stimulus and noise correlations. This new model belongs to a class of maximum entropy models with strong links to statistical physics <xref ref-type="bibr" rid="pcbi.1002922-Tkaik1">[27]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Schneidman3">[42]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Sessak1">[53]</xref> and is directly related to maximum entropy models of neural vocabulary <xref ref-type="bibr" rid="pcbi.1002922-Schneidman2">[18]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Tkaik1">[27]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Tkaik2">[32]</xref>, allowing us to estimate the entropy and its derivative quantities for the neural code. In sum, the maximum entropy framework enables us to progress towards our goal of focusing attention on the level of joint patterns of activity, rather than capturing low-level statistics (e.g., the individual firing rates) of the neural code alone.</p>
      <p>We start by showing that linear-nonlinear (LN) models of retinal ganglion cells responding to spatially unstructured stimuli capture a significant part of the single neuron response, but still miss much of the detail; in particular, we show that they fail to capture the correlation structure of firing among the cells. We next present our new <italic>stimulus-dependent maximum entropy</italic> (SDME) model, which is a hybrid between linear-nonlinear models for single cells and the pairwise maximum entropy models. Applied to groups of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e001" xlink:type="simple"/></inline-formula> neurons recorded simultaneously, we find that SDME models outperform the LN models for the stimulus-response mapping of single cells and, crucially, give a significantly better account of the distribution of codewords in the neural population.</p>
    </sec>
    <sec id="s2">
      <title>Results</title>
      <p>We recorded the simultaneous spiking activity of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e002" xlink:type="simple"/></inline-formula> ganglion cells from the salamander retina <xref ref-type="bibr" rid="pcbi.1002922-Segev1">[54]</xref>, presented with repeats of a <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e003" xlink:type="simple"/></inline-formula> long full-field flicker (“Gaussian FFF”) movie, where the light intensity on the screen was sampled independently from a Gaussian distribution with a frequency of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e004" xlink:type="simple"/></inline-formula> (<xref ref-type="fig" rid="pcbi-1002922-g001">Fig. 1a</xref>). This “frozen noise” stimulus was repeated 726 times, for a total of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e005" xlink:type="simple"/></inline-formula> of stimulation. Most of the recorded cells exhibited temporal OFF-like behaviors (<xref ref-type="fig" rid="pcbi-1002922-g001">Fig. 1b</xref>). We chose for further analysis <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e006" xlink:type="simple"/></inline-formula> cells that were reliably sorted, demonstrated a robust and stable response over repeats, and generated at least <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e007" xlink:type="simple"/></inline-formula> spikes during the course of the experiment. We also left out the first 100 repeats of the stimulus, when the retina was still adapting, to ensure stationarity (see <xref ref-type="sec" rid="s4">Methods</xref>). To construct the population response codewords, we discretized time into <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e008" xlink:type="simple"/></inline-formula> bins, and represented the activity of the neurons in response to the stimulus as binary patterns in each of the time bins. If neuron <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e009" xlink:type="simple"/></inline-formula> was active in time bin <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e010" xlink:type="simple"/></inline-formula>, we denoted a spike (or more spikes) as <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e011" xlink:type="simple"/></inline-formula>, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e012" xlink:type="simple"/></inline-formula> if it was silent. In this representation, the whole experiment yielded a total of about <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e013" xlink:type="simple"/></inline-formula> 100-bit samples. Maximum entropy models are defined by a choice of constrained statistics over the ensemble of codewords and stimuli, as we discuss below; our ability to estimate these reliably from data is thus a key systematic issue, which we address in the <xref ref-type="sec" rid="s4">Methods</xref> section.</p>
      <fig id="pcbi-1002922-g001" position="float">
        <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g001</object-id>
        <label>Figure 1</label>
        <caption>
          <title>Response of a large population of ganglion cells to a 10 s long repeated visual stimulus.</title>
          <p>(<bold>a</bold>) White noise uncorrelated Gaussian stimulus presented at <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e014" xlink:type="simple"/></inline-formula> and the spiking patterns of 3 cells to repeated presentations of the stimulus. (<bold>b</bold>) Spike-trigerred averages of 110 simultaneously recorded cells; a subset of 100 cells was chosen for further analysis. (<bold>c</bold>) The histogram of pairwise correlation coefficients between cells for repeated Gaussian white noise stimulus (green). For comparison, the statistics of the response on repeated natural pixel movie (red), and non-repeated natural pixel movie (blue) is also shown, as documented in Ref. <xref ref-type="bibr" rid="pcbi.1002922-Ganmor2">[35]</xref>. The significance cutoff for correlation coefficients is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e015" xlink:type="simple"/></inline-formula>, 95% of correlations are above this cut (see <xref ref-type="sec" rid="s4">Methods</xref>). (<bold>d</bold>) Average pairwise correlation coefficient between cells as a function of the distance (mean and std are across pairs of cells at a given distance).</p>
        </caption>
        <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g001" position="float" xlink:type="simple"/>
      </fig>
      <p>All models of the population responses were fitted based on one half of our data (313 training repeats), and evaluated (tested) on the other half of repeats; overall, the train and test data were each almost 1 hr long. While fitting the stimulus-dependent maximum entropy model can be done using non-repeated stimuli, assessing the performance of the models requires many repeated presentations of the same stimulus to quantify both single cell and in particular population spiking patterns, as well as noise entropy and mutual information. Unlike for single neurons (which are fully characterized by their firing rate), in the case of large populations, capturing well the very high-dimensional distribution of codewords given the stimulus, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e016" xlink:type="simple"/></inline-formula>, is a non-trivial problem, as we show below. Because we were interested in models of codeword distributions, we chose the experimental design that maximizes the number of repeats rather than the duration of the stimulus; consequently, we examined how the models generalize across stimulus repeats rather than across different stimuli. Despite the limited duration of the stimulus segment, the large number of repeats nevertheless enabled us to recover smooth estimates of the linear filters (<xref ref-type="fig" rid="pcbi-1002922-g001">Fig. 1b</xref>). Furthermore, because of the way we construct our maximum entropy models, these linear filters are <italic>the same</italic> for all the models considered, so the performance of the models cannot differ due to the differences in modeled stimulus sensitivities. With this setup, we are therefore able to fairly compare the performance and generalization of various models of joint population activity given the stimulus.</p>
      <sec id="s2a">
        <title>Conditionally independent Linear-Nonlinear models for a neural population</title>
        <p>Using repeated presentations of the same movie, we estimated the average response of each of the cells across repeats, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e017" xlink:type="simple"/></inline-formula>, or the peri-stimulus time histogram (PSTH). Following Refs. <xref ref-type="bibr" rid="pcbi.1002922-Schwartz1">[4]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Fairhall1">[55]</xref>, we fitted a linear-nonlinear model for each of the cells in the experiment, so that the resulting model for the population as a whole is a set of uncoupled, conditionally independent LN neurons that we denote together as a ‘S1’ model (the reason for this notation will be explained later). The predicted rate of every neuron is then <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e018" xlink:type="simple"/></inline-formula>, where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e019" xlink:type="simple"/></inline-formula> is a linear filter matched for the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e020" xlink:type="simple"/></inline-formula>-th cell, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e021" xlink:type="simple"/></inline-formula> is its point-wise nonlinear function, and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e022" xlink:type="simple"/></inline-formula> is the stimulus fragment from time <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e023" xlink:type="simple"/></inline-formula> until <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e024" xlink:type="simple"/></inline-formula> (here we used <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e025" xlink:type="simple"/></inline-formula>, making <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e026" xlink:type="simple"/></inline-formula> a vector of light intensities with 40 components). Linear filters were reconstructed using reverse correlation (spike-triggered average), and nonlinearities were obtained by histograming <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e027" xlink:type="simple"/></inline-formula> into <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e028" xlink:type="simple"/></inline-formula> adaptively-sized bins and obtaining <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e029" xlink:type="simple"/></inline-formula> by inverting <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e030" xlink:type="simple"/></inline-formula> using Bayes' rule. These LN models captured most of structure of the PSTH, yet as the example cell in <xref ref-type="fig" rid="pcbi-1002922-g002">Fig. 2a</xref> shows, they often misestimated the exact firing rates of the neuron, or sometimes even missed parts of the neural response altogether. For the Gaussian FFF, the normalized (Pearson) correlation between the measured and predicted PSTH, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e031" xlink:type="simple"/></inline-formula>, was <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e032" xlink:type="simple"/></inline-formula> (mean <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e033" xlink:type="simple"/></inline-formula> std across 100 cells).</p>
        <fig id="pcbi-1002922-g002" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g002</object-id>
          <label>Figure 2</label>
          <caption>
            <title>Pairwise SDME (S2) model predicts the firing rate of single cells better than conditionally independent LN (S1) models.</title>
            <p>(<bold>a</bold>) Example of the PSTH segment for one cell (green), the best prediction of the S1 model (blue) and of the S2 model (red). (<bold>b</bold>) Correlation coefficient between the true PSTH and S2 model prediction (vertical axis) vs. the correlation between the true PSTH and the S1 model prediction (horizontal axis); each plot symbol is a separate cell, dotted line shows equality. S2 significantly outperforms S1 (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e034" xlink:type="simple"/></inline-formula>, paired two-sided Wilcoxon test). The neuron chosen in panel (a) is shown in orange.</p>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g002" position="float" xlink:type="simple"/>
        </fig>
        <p>The performance gap of the canonical LN models in predicting single neuron responses suggests that either the single-neuron models need to be improved to account for the observed behavior, or that interactions between neurons play an important encoding role and need to be included. Clearly, the firing rate prediction performance can be improved for single neurons by models with higher-dimensional stimulus sensitivity (e.g. <xref ref-type="bibr" rid="pcbi.1002922-Fairhall1">[55]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Tkaik4">[56]</xref>) or dynamical aspects of spiking behavior (e.g. <xref ref-type="bibr" rid="pcbi.1002922-Keat1">[57]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Ozuysal1">[58]</xref>). However, previous work (and <xref ref-type="sec" rid="s2">results</xref> below) demonstrated that even conditionally-independent models which by construction perfectly reproduce the firing rate behavior of single cells, often fail to capture the measured correlation structure of firing between pairs of cells, as well as higher-order statistical structure <xref ref-type="bibr" rid="pcbi.1002922-Schneidman2">[18]</xref>.</p>
        <p>We therefore sought a model of the neural code that would be able to reproduce the correlation structure of population codes. We asked whether a model that combined the LN (receptive-field based) aspect of single cells with the interactions between cells, could give a better account of the neural stimulus-response mapping. Importantly, the new model should capture not only the firing rate of single cells but also accurately predict the full distribution of the joint activity patterns across the whole population. Because the joint distributions of activity are high-dimensional (e.g., the distribution over codewords across the duration of the experiment, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e035" xlink:type="simple"/></inline-formula>, has <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e036" xlink:type="simple"/></inline-formula> components), this is a very demanding benchmark for any model.</p>
      </sec>
      <sec id="s2b">
        <title>A Stimulus Dependent Maximum Entropy model for a neural population</title>
        <p>We propose the simplest extension to the conditionally-independent set of LN models for each cell in the recorded population, by including pairwise couplings between cells, so that the spiking of cell <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e037" xlink:type="simple"/></inline-formula> can increase or decrease the probability of spiking for cell <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e038" xlink:type="simple"/></inline-formula> <xref ref-type="bibr" rid="pcbi.1002922-Tkaik5">[59]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-GranotAtdegi1">[60]</xref>. Importantly, in contrast to previous models, we introduce this coupling so that the resulting model is a maximum-entropy model for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e039" xlink:type="simple"/></inline-formula>, the conditional distribution over population activity patterns given the stimulus. We recall that the maximum entropy models give the most parsimonious probabilistic description of the joint activity patterns, which perfectly reproduces a chosen set of measured statistics over these patterns, without making any additional assumptions <xref ref-type="bibr" rid="pcbi.1002922-Jaynes1">[61]</xref>.</p>
        <p>Specifically, we construct a model that relies only on the measured overall correlations between pairs of neurons, which can be reliably estimated from experimental data (see <xref ref-type="sec" rid="s4">Methods</xref>). We find that (i) the pairwise correlations between cells in response to the Gaussian FFF movie are typically weak but significantly different from zero (<xref ref-type="fig" rid="pcbi-1002922-g001">Fig. 1c</xref>, consistent with previous reports <xref ref-type="bibr" rid="pcbi.1002922-Schneidman2">[18]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Tkaik1">[27]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Tkaik2">[32]</xref>); (ii) the correlation in neural activities shows a fast decay with distance despite the infinite correlation length of the stimulus, but the decay does not reach zero correlation even at relatively large distances (<xref ref-type="fig" rid="pcbi-1002922-g001">Fig. 1d</xref>). This salient structure, along with any other potential statistical correlation at the pairwise order, is characterized by the covariance matrix of activities, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e040" xlink:type="simple"/></inline-formula>, where the averages are taken across time and repeats.</p>
        <p>We start by introducing the least structured (maximum entropy) distribution of the population responses to stimuli, by treating each time point along the stimulus separately; since every moment of time maps uniquely into one stimulus, we start by building the model of the response given time. We thus find <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e041" xlink:type="simple"/></inline-formula> that reproduces exactly the observed average firing rate for each time bin <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e042" xlink:type="simple"/></inline-formula> in the stimulus and for each neuron <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e043" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e044" xlink:type="simple"/></inline-formula>, as well as the overall covariance matrix <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e045" xlink:type="simple"/></inline-formula> between all pairs of cells (c.f. <xref ref-type="bibr" rid="pcbi.1002922-Tkaik6">[62]</xref>). Thus, we seek <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e046" xlink:type="simple"/></inline-formula> that maximizes <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e047" xlink:type="simple"/></inline-formula>:<disp-formula id="pcbi.1002922.e048"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002922.e048" xlink:type="simple"/><label>(1)</label></disp-formula>where the subscript to brackets <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e049" xlink:type="simple"/></inline-formula> denotes whether the averaging is done over the maximum entropy distribution (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e050" xlink:type="simple"/></inline-formula>), or over the recorded data; Lagrange multipliers <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e051" xlink:type="simple"/></inline-formula> ensure that the distributions are normalized. This is an optimization problem for parameters <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e052" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e053" xlink:type="simple"/></inline-formula>, which has a unique solution since the entropy is convex. The functional form of the solution to this optimization problem is well-known and in our case it can be written as<disp-formula id="pcbi.1002922.e054"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002922.e054" xlink:type="simple"/><label>(2)</label></disp-formula>where the individual time-dependent parameters for each of the cells, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e055" xlink:type="simple"/></inline-formula>, and the stimulus-independent pairwise interaction terms <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e056" xlink:type="simple"/></inline-formula>, are set to match the measured firing rates <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e057" xlink:type="simple"/></inline-formula> and the pairwise correlations <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e058" xlink:type="simple"/></inline-formula>; <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e059" xlink:type="simple"/></inline-formula> is a normalization factor or partition function for each time bin <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e060" xlink:type="simple"/></inline-formula>, given by <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e061" xlink:type="simple"/></inline-formula>.</p>
        <p>The <italic>pairwise time-dependent maximum entropy (pairwise TDME or T2) model</italic> in <xref ref-type="disp-formula" rid="pcbi.1002922.e054">Eq. (2)</xref> is equivalent to an Ising model from physics, where the single-cell parameters are time-dependent local fields acting on each of the neurons (spins), and static (stimulus-independent) infinite-range interaction terms couple each pair of spins. In the limit where interactions go to zero, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e062" xlink:type="simple"/></inline-formula>, the model in <xref ref-type="disp-formula" rid="pcbi.1002922.e054">Eq. (2)</xref> becomes the full conditionally-independent model, itself a <italic>first-order time-dependent maximum entropy model</italic> that reproduces exactly the firing rate of every neuron, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e063" xlink:type="simple"/></inline-formula>:<disp-formula id="pcbi.1002922.e064"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002922.e064" xlink:type="simple"/><label>(3)</label></disp-formula>In this case the probability distribution factorizes, and the solution for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e065" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e066" xlink:type="simple"/></inline-formula> becomes trivially computable from the firing rates, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e067" xlink:type="simple"/></inline-formula>. For time bins <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e068" xlink:type="simple"/></inline-formula> that are short enough to contain 0 or 1 spike (as we have assumed throughout), <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e069" xlink:type="simple"/></inline-formula> is given by <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e070" xlink:type="simple"/></inline-formula>. Consistent with our previous notation, we denote this full conditionally-independent model as <bold>T1</bold>.</p>
        <p>Time-dependent maximum entropy models are powerful, since they make no assumption about how the stimulus drives the response; they often serve as useful benchmarks for other models (especially the T1 model). On the other hand, these models require repeated stimulus presentations to fit, involve a number of parameters that grows linearly with the duration of the stimulus, do not generalize to new stimuli, and do not provide an explicit map from the stimuli to the responses.</p>
        <p>We therefore present a more particular form of the model of <xref ref-type="disp-formula" rid="pcbi.1002922.e054">Eq. (2)</xref> that, <bold>(i)</bold>, would give an explicit description of stimulus-dependent distribution of population patterns; <bold>(ii)</bold>, would generalize to new stimuli; <bold>(iii)</bold>, could be directly compared to the uncoupled LN models; and <bold>(iv)</bold>, would not require repeats of the same stimulus to fit. Specifically, rather than having an arbitrary time-dependent parameter for every neuron for each time bin, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e071" xlink:type="simple"/></inline-formula>, we assume that this dependence takes place through the stimulus projection alone, i.e. <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e072" xlink:type="simple"/></inline-formula>. This is analogous to an LN model, where the neural firing depends on the value of the stimulus projection onto the linear filter <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e073" xlink:type="simple"/></inline-formula>. This choice is made for simplicity; this model can be generalized to, e.g., neurons that depend on two linear projections of the stimulus, by making <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e074" xlink:type="simple"/></inline-formula> depend jointly on <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e075" xlink:type="simple"/></inline-formula>, although such models would be progressively more difficult to infer from data.</p>
        <p>Concretely, we estimated the linear filter <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e076" xlink:type="simple"/></inline-formula> for each cell <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e077" xlink:type="simple"/></inline-formula> using reverse correlation, and convolved the filter with the stimulus sequence, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e078" xlink:type="simple"/></inline-formula>, to get the “generator signal” <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e079" xlink:type="simple"/></inline-formula>. We then looked for the maximum entropy probability distribution <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e080" xlink:type="simple"/></inline-formula>, by requiring that the average firing rate of every cell given the generator signal is the same in the data and under the model, i.e. <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e081" xlink:type="simple"/></inline-formula> (see <xref ref-type="sec" rid="s4">Methods</xref>); as before, we also required the model to reproduce the overall covariance between all pairs of cells, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e082" xlink:type="simple"/></inline-formula>. This yields a <italic>pairwise stimulus-dependent maximum entropy (pairwise SDME or S2) model</italic>, which takes the following form:<disp-formula id="pcbi.1002922.e083"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002922.e083" xlink:type="simple"/><label>(4)</label></disp-formula>The parameters of this model are: <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e084" xlink:type="simple"/></inline-formula> couplings <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e085" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e086" xlink:type="simple"/></inline-formula> parameters <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e087" xlink:type="simple"/></inline-formula>, and a linear filter <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e088" xlink:type="simple"/></inline-formula> for each cell; these parameters define the energy function <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e089" xlink:type="simple"/></inline-formula> of the model. We used a Monte Carlo based gradient descent learning procedure to find the model parameters <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e090" xlink:type="simple"/></inline-formula> numerically (see <xref ref-type="sec" rid="s4">Methods</xref>; note that the problem is still convex with a single solution for the parameter values).</p>
        <p>By construction, the S2 model exactly reproduces the covariance of activities, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e091" xlink:type="simple"/></inline-formula>, between all pairs of cells, and also the LN model properties of every cell: an arbitrary nonlinear function <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e092" xlink:type="simple"/></inline-formula> can be encoded by properly choosing how parameters <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e093" xlink:type="simple"/></inline-formula> depend on the linear projections of the stimulus, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e094" xlink:type="simple"/></inline-formula>. We can construct a maximum entropy model with <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e095" xlink:type="simple"/></inline-formula> (no constraints on the pairwise correlations <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e096" xlink:type="simple"/></inline-formula>). The result is a set of uncoupled (conditionally independent) LN models:<disp-formula id="pcbi.1002922.e097"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002922.e097" xlink:type="simple"/><label>(5)</label></disp-formula></p>
        <p><xref ref-type="fig" rid="pcbi-1002922-g003">Fig. 3</xref> shows all the models in a systematic way: the pairwise time-dependent maximum entropy (T2) model of <xref ref-type="disp-formula" rid="pcbi.1002922.e054">Eq. (2)</xref> is an extension of conditionally independent (T1) model that additionally reproduces the measured pairwise correlations between cells. In a directly analogous way, the pairwise stimulus-dependent maximum entropy (S2) model of <xref ref-type="disp-formula" rid="pcbi.1002922.e083">Eq. (4)</xref> is an extension to the set of uncoupled LN models (S1), <xref ref-type="disp-formula" rid="pcbi.1002922.e097">Eq. (5)</xref>, that additionally reproduces the measured pairwise correlations between cells. Because <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e098" xlink:type="simple"/></inline-formula> (<xref ref-type="disp-formula" rid="pcbi.1002922.e083">Eq. 4</xref>) agrees with <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e099" xlink:type="simple"/></inline-formula> (<xref ref-type="disp-formula" rid="pcbi.1002922.e097">Eq. 5</xref>) exactly in all constrained single-neuron statistics, any improvement in prediction of the S2 model, be it in the firing rate or the codeword distributions, can be directly ascribed to the effect of the interaction terms, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e100" xlink:type="simple"/></inline-formula>.</p>
        <fig id="pcbi-1002922-g003" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g003</object-id>
          <label>Figure 3</label>
          <caption>
            <title>An overview of maximum entropy encoding models.</title>
            <p>The explicit dependence of single-neuron terms (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e101" xlink:type="simple"/></inline-formula>, vertical axis, ‘T’ or ‘S’), and the absence or presence of pairwise terms (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e102" xlink:type="simple"/></inline-formula>, horizontal axis, ‘1’ or ‘2’), together define the type of the maximum entropy model (e.g. pairwise SDME is ‘S2’). For completeness, the first row of the table includes static maximum entropy models of population vocabulary, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e103" xlink:type="simple"/></inline-formula>, which have no explicit stimulus dependence. Full conditionally independent model (T1) reproduces exactly the instantaneous firing rate of every neuron, and thus fully captures the stimulus sensitivity, history effects, and adaptation on a single neuron level; for experimentally recorded rasters with stimulus repeats, simulated T1 rasters are often generated by taking the original data and, at each time point and for every neuron, randomly permuting the responses recorded on different stimulus repeats. “Total correlation” is the pairwise correlation matrix of activities, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e104" xlink:type="simple"/></inline-formula>, averaged over all repetitions and all times in the experiment.</p>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g003" position="float" xlink:type="simple"/>
        </fig>
        <p>An alternative approach to describing the joint response of large populations of neurons to external stimuli has been presented in Ref. <xref ref-type="bibr" rid="pcbi.1002922-Pillow1">[41]</xref>. The Generalized Linear Model (GLM) gives a generative model from which one can sample simulated responses to new stimuli, relying on activity history and temporal dependencies between cells, but assuming conditional independence within any given time bin. We compare the advantages of the two approaches in the <xref ref-type="sec" rid="s3">Discussion</xref> below, but briefly emphasize here that a key difference is that GLM does not present an explicit probability distribution over codewords (that are defined for temporal bins significantly longer than those of the GLMs), which is central for the analysis of the neural code we present below.</p>
      </sec>
      <sec id="s2c">
        <title>Pairwise SDME (S2) model outperforms conditionally independent models in describing single cell responses and joint patterns of activity</title>
        <p>To assess the accuracy of different stimulus-dependent models, and, in particular, of the contribution of the interactions between cells, we fitted and quantified the performance of the uncoupled LN models (S1) and the pairwise SDME model (S2). At the level of single neurons, we found that the S2 model predicted the firing rates better than the S1 model (see e.g. <xref ref-type="fig" rid="pcbi-1002922-g002">Fig. 2a</xref>), with the normalized correlation coefficient between the true and predicted firing rate, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e105" xlink:type="simple"/></inline-formula> reaching <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e106" xlink:type="simple"/></inline-formula> (mean <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e107" xlink:type="simple"/></inline-formula> std across 100 cells), as shown in <xref ref-type="fig" rid="pcbi-1002922-g002">Fig. 2b</xref>.</p>
        <p>The differences between the S2 and the S1 models become more striking at the level of the activity patterns of the whole population. <xref ref-type="fig" rid="pcbi-1002922-g004">Figs. 4a,b</xref> show the complex structure of the population activity patterns across all 626 repeats at a particular moment in time. During times when the population is active, it generates a wide diversity of patterns in response to the same stimulus; even with hundreds of repeats, these distributions cannot be empirically sampled. Nevertheless, the large number of repeats suffices to identify and estimate reliable low-order marginals of these distributions, in particular, the correlations between the pairs of neurons at various points in time. The wide range of magnitudes of these reliably estimated correlations shows that a number of neuronal pairs are far from conditionally independent. As shown in <xref ref-type="fig" rid="pcbi-1002922-g004">Fig. 4c</xref>, the S2 model captures a significant fraction of this correlation structure on a timebin-by-timebin basis (on test data); clearly, the S1 model fails at this task.</p>
        <fig id="pcbi-1002922-g004" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g004</object-id>
          <label>Figure 4</label>
          <caption>
            <title>Pairwise SDME (S2) model predicts population activity patterns for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e108" xlink:type="simple"/></inline-formula> neurons better than conditionally independent LN (S1) models.</title>
            <p>(<bold>a</bold>) The activity raster for 100 neurons across 626 repeats of the stimulus at a point in time where the retina is moderately active (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e109" xlink:type="simple"/></inline-formula>). Dots represent individual spikes; training repeats denoted in black, test repeats in orange. (<bold>b</bold>) The diversity in retinal responses in a. Shown are all distinct patterns; their number is comparable to the number of repeats. Neurons are resorted by their instantaneous firing rate (high rate = top, low rate = bottom). (<bold>c</bold>) S2 model fit on the training repeats predicts the reliably estimated correlation coefficients between pairs of neurons at various time points where the retina is active. We identify all correlation coefficients whose value can be estimated from data with less than 25% relative error across many splits of the repeats into two halves. The value of these correlation coefficients is estimated on the test set (horizontal axis) and compared to the model prediction (vertical axis). (<bold>d</bold>) The log-likelihood ratio of the population firing patterns under the S2 model and under the S1 model, shown as a function of time (violet dots, scale at left) for an example (test) stimulus repeat. For reference, the average population firing rate is shown in grey (scale at right). The arrow denotes the time bin displayed in a, b.</p>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g004" position="float" xlink:type="simple"/>
        </fig>
        <p>We found that S2 is orders of magnitude better in predicting the population neural responses to stimuli. This is quantified in <xref ref-type="fig" rid="pcbi-1002922-g004">Fig. 4d</xref>, which compares S1 and S2 through the log-likelihood ratio, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e110" xlink:type="simple"/></inline-formula>, for the population activity patterns <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e111" xlink:type="simple"/></inline-formula> under the two models. These differences are large in particular for those stimuli that elicit a strong response, that is, precisely where the response consists of synchronous spiking and the structure of the codewords can be nontrivial. <xref ref-type="fig" rid="pcbi-1002922-g005">Fig. 5</xref> summarizes these results by showing the average log-likelihood ratio over all testing repeats, emphasizing that the difference between the models becomes particularly apparent for groups of more than 20 cells.</p>
        <fig id="pcbi-1002922-g005" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g005</object-id>
          <label>Figure 5</label>
          <caption>
            <title>The performance of the SDME (S2) model relative to conditionally independent LN (S1) models.</title>
            <p>The average log likelihood ratio between the S2 and the S1 models evaluated on the test set, as a function of the population size, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e112" xlink:type="simple"/></inline-formula> (error bars = std over 10 randomly chosen groups of neurons at that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e113" xlink:type="simple"/></inline-formula>).</p>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g005" position="float" xlink:type="simple"/>
        </fig>
        <p>We next examined how well various models of the neural codebook, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e114" xlink:type="simple"/></inline-formula>, explain the total vocabulary, that is, the distribution of neural codewords observed across the whole duration of the experiment, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e115" xlink:type="simple"/></inline-formula>. Despite the nominally large space of possible codewords—much larger than the total number of samples in the experiment (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e116" xlink:type="simple"/></inline-formula>)—the sparsity of spikes and the correlations between neurons restrict the vocabulary to a much smaller set of patterns. Some of these occur many times during our stimulus presentation, allowing us to estimate their empirical probability, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e117" xlink:type="simple"/></inline-formula>, directly from the experiment, and compare it to the model prediction <xref ref-type="bibr" rid="pcbi.1002922-Ganmor2">[35]</xref>. The most prominent example of such frequently observed codewords is the silent pattern, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e118" xlink:type="simple"/></inline-formula>, which is seen <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e119" xlink:type="simple"/></inline-formula> of the time. <xref ref-type="fig" rid="pcbi-1002922-g006">Fig. 6</xref> shows the likelihood ratio of the model probability and empirical probability for various codewords observed in the test part of the experiment, as a function of the rate at which these codewords appear. Here we used an additional model for comparison, i.e., the full conditionally-independent model (T1), where every cell is described in terms of time-dependent firing rate. The S2 model in <xref ref-type="fig" rid="pcbi-1002922-g006">Fig. 6a</xref> strongly outperforms the S1 model in <xref ref-type="fig" rid="pcbi-1002922-g006">Fig. 6b</xref>, and has a slightly better performance than the T1 model (<xref ref-type="fig" rid="pcbi-1002922-g006">Fig. 6c</xref>), despite the fact that the latter is determined by <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e120" xlink:type="simple"/></inline-formula> parameters, the firing rates of every cell in every time bin. Quantitatively, the per-codeword log-likelihood of the test data under S1 model is 5.30, under T1 model 4.34, under S2 model 4.12, under empirically sampled distribution on the training set 4.02, while the lower bound on the log-likelihood (obtained when the “model” are the true empirical frequencies on the test set) is 2.98 (see <xref ref-type="sec" rid="s4">Methods</xref>).</p>
        <fig id="pcbi-1002922-g006" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g006</object-id>
          <label>Figure 6</label>
          <caption>
            <title>The performance of various models in accounting for the total vocabulary of the population, </title>
            <p><inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e121" xlink:type="simple"/></inline-formula><bold>.</bold> The results for the S2 model are shown in (<bold>a</bold>), the results for the S1 model in (<bold>b</bold>), and the results for a full conditionally independent model (T1) in (<bold>c</bold>). The first row displays the log ratio of model to empirical probabilities for various codewords (dots), as a function of that codeword's empirical frequency in the recorded data. The model probabilities were estimated by generating Monte Carlo samples drawn from the corresponding model distributions; only patterns that were generated in the MC run as well as found in the recorded data are shown. GoF quantifies the deviation between true and predicted <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e122" xlink:type="simple"/></inline-formula> of the non-silent codewords shown in the plot; smaller values indicate better agreement (see <xref ref-type="sec" rid="s4">Methods</xref>). The second row summarizes this scatterplot by binning codewords according to their frequency, and showing the average log probability ratio in the bin (solid line), as well as the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e123" xlink:type="simple"/></inline-formula> std scatter across the codewords in the bin (shaded area). The highly probable all-silent state, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e124" xlink:type="simple"/></inline-formula>, is shown separately as a circle. The third row shows the overlap between 500 most frequent patterns in the data and 500 most likely patterns generated by the model (see text). Models were fit on training repeats; comparisons are done only with test repeats data.</p>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g006" position="float" xlink:type="simple"/>
        </fig>
        <p>On average, S2 predicts the probabilities of the patterns of activity with minimal bias, and with a standard deviation of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e125" xlink:type="simple"/></inline-formula> of about 1; the S1 model in comparison is biased and has a spread that is more than twice as large. Even more striking is the fact that S1 assigns very low probabilities to some codewords such that they were never generated during our Monte Carlo sampling (and are therefore not even shown in scatterplots of <xref ref-type="fig" rid="pcbi-1002922-g006">Fig. 6</xref>), although they were frequently observed in the experiment. This discrepancy is quantified by enumerating the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e126" xlink:type="simple"/></inline-formula> most probable patterns in the data and in the model (by sampling, see <xref ref-type="sec" rid="s4">Methods</xref>), and measuring the size of the intersection of the two sets of patterns. In other words, we ask if the model is even able to access all the patterns that one is likely to record in the experiment. As shown in the bottom of <xref ref-type="fig" rid="pcbi-1002922-g006">Fig. 6</xref>, S2 does well on this task, with 419 codewords in the intersection of the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e127" xlink:type="simple"/></inline-formula> most likely patterns in the data and the model. This is a much better performance than the S1 model, and a little better than for the T1 model (which has many more parameters). We emphasize that all these comparisons were done on test data only, so that the models had to generalize over the large diversity of patterns where some of the patterns seen in the training set might never occur on the testing set and vice versa (see <xref ref-type="fig" rid="pcbi-1002922-g004">Fig. 4a,b</xref>).</p>
        <p>The S2 model was constructed to capture exactly the total pairwise correlation in neuronal spiking, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e128" xlink:type="simple"/></inline-formula>. With repeated stimulus, this total correlation can be broken down into the signal and noise components. The signal correlations, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e129" xlink:type="simple"/></inline-formula>, are inferred by applying the same formula as for the total correlation, but on the spiking raster where the repeated trial indices have been randomly and independently permuted for each time bin. This removes any correlation due to interactions between spikes on simultaneously recorded trials, and only leaves the correlations induced by the response being locked to the stimulus. The noise correlation, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e130" xlink:type="simple"/></inline-formula>, is then defined as the difference between the total and the signal components, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e131" xlink:type="simple"/></inline-formula>. We calculated the noise correlations between all pairs in our <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e132" xlink:type="simple"/></inline-formula> neuron dataset. By their definition, the conditionally independent models cannot reproduce <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e133" xlink:type="simple"/></inline-formula>, which are always zero for those models. To assess the performance of the S2 model, we drew samples from our model distribution using a Monte Carlo simulation and compared the noise correlations in the simulated rasters to the true noise correlations. The model prediction is tightly correlated with the measured values, as shown in <xref ref-type="fig" rid="pcbi-1002922-g007">Fig. 7</xref>. We observe a systematic deviation of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e134" xlink:type="simple"/></inline-formula>, most likely because the assumed dependence on the stimulus through one linear filter per neuron is insufficient to capture the complete dependence on stimulus, thereby underestimating the full structure of stimulus correlation and inducing an excess in the noise correlation. Despite this, the degree of correspondence in noise correlations observed in <xref ref-type="fig" rid="pcbi-1002922-g007">Fig. 7</xref> is telling us that the S2 model has clearly captured a large amount of noise covariance structure in neural firing at the network level.</p>
        <fig id="pcbi-1002922-g007" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g007</object-id>
          <label>Figure 7</label>
          <caption>
            <title>Measured vs predicted noise correlations for the pairwise SDME (S2) model.</title>
            <p>Noise correlation (see text) is estimated from recorded data for every pair of neurons, and plotted against the noise correlation predicted by the S2 model (each pair of neurons = one dot; shown are <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e135" xlink:type="simple"/></inline-formula> dots for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e136" xlink:type="simple"/></inline-formula> neurons; for significantly correlated pairs, the slope of the best fit line is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e137" xlink:type="simple"/></inline-formula>, with <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e138" xlink:type="simple"/></inline-formula>). Conditionally independent models predict zero noise correlation for all pairs.</p>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g007" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec id="s2d">
        <title>Interpretation of the functional interactions between cells in the pairwise SDME (S2) model</title>
        <p>How should we interpret the inferred parameters of the S2 model? LN models have a clear mechanistic interpretation in terms of the cell's receptive field and the nonlinear spiking mechanism. Here, similarly, the stimulus dependent part of the model for each cell, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e139" xlink:type="simple"/></inline-formula>, is a nonlinear function of a filtered version of the stimulus <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e140" xlink:type="simple"/></inline-formula>; in the absence of neuron-to-neuron couplings, the nonlinearity of every neuron would correspond to <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e141" xlink:type="simple"/></inline-formula>, where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e142" xlink:type="simple"/></inline-formula>, according to <xref ref-type="disp-formula" rid="pcbi.1002922.e097">Eq. (5)</xref>. The dependence of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e143" xlink:type="simple"/></inline-formula> on the stimulus projection <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e144" xlink:type="simple"/></inline-formula> is similar across the recorded cells as shown in <xref ref-type="fig" rid="pcbi-1002922-g008">Fig. 8a</xref>; as expected, higher overlaps with the linear filter induce higher probability of spiking.</p>
        <fig id="pcbi-1002922-g008" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g008</object-id>
          <label>Figure 8</label>
          <caption>
            <title>Pairwise SDME (S2) model parameters.</title>
            <p>(<bold>a</bold>) Average values of the LN-like driving term, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e145" xlink:type="simple"/></inline-formula>, where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e146" xlink:type="simple"/></inline-formula>, across all cells <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e147" xlink:type="simple"/></inline-formula> (error bars = std across cells), for each of the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e148" xlink:type="simple"/></inline-formula> adaptive bins for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e149" xlink:type="simple"/></inline-formula> (see <xref ref-type="sec" rid="s4">Methods</xref>). (<bold>b</bold>) Pairwise interaction map <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e150" xlink:type="simple"/></inline-formula> of the S2 model, between all <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e151" xlink:type="simple"/></inline-formula> neurons in the experiment. (<bold>c</bold>) Histogram of pairwise interaction values from (b), and their average value as a function of the distance between cells (inset). (<bold>d</bold>) For each pair of cells <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e152" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e153" xlink:type="simple"/></inline-formula>, we plot the value of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e154" xlink:type="simple"/></inline-formula> under the static maximum entropy model of <xref ref-type="disp-formula" rid="pcbi.1002922.e157">Eq. (6)</xref> vs. the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e155" xlink:type="simple"/></inline-formula> from the S2 model of <xref ref-type="disp-formula" rid="pcbi.1002922.e083">Eq. (4)</xref>.</p>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g008" position="float" xlink:type="simple"/>
        </fig>
        <p>The pairwise interaction terms in the S2 model, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e156" xlink:type="simple"/></inline-formula>, are symmetric, static, and stimulus independent by construction. As such, they represent only functional and not physical (i.e. synaptic) connections between the cells. <xref ref-type="fig" rid="pcbi-1002922-g008">Fig. 8b</xref> shows the pairwise interaction map for 100 cells; the histogram of their values (in <xref ref-type="fig" rid="pcbi-1002922-g008">Fig. 8c</xref>) reflects that they can be of both signs, but the distribution has a stronger positive tail, i.e. a number of cell pairs tend to spike together or be silent together with a probability that is higher than expected from their respective LN models. We can compare these interactions to the interactions of a static (non-stimulus-dependent) pairwise maximum entropy model for the population vocabulary <xref ref-type="bibr" rid="pcbi.1002922-Schneidman2">[18]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Shlens1">[28]</xref>:<disp-formula id="pcbi.1002922.e157"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002922.e157" xlink:type="simple"/><label>(6)</label></disp-formula>In this model for the total distribution of codewords, there is no stimulus dependence, and the parameters <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e158" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e159" xlink:type="simple"/></inline-formula> are chosen so that the distribution is as random as possible, while reproducing exactly the measured mean firing rate of every neuron <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e160" xlink:type="simple"/></inline-formula>, and every pairwise correlation, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e161" xlink:type="simple"/></inline-formula>, across the whole duration of the experiment.</p>
        <p>Interestingly, we find that the pairwise interaction terms in the S2 model of <xref ref-type="disp-formula" rid="pcbi.1002922.e083">Eq. (4)</xref> are closely related to the interactions in the static pairwise maximum entropy model of <xref ref-type="disp-formula" rid="pcbi.1002922.e157">Eq. (6)</xref>: S2 interactions, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e162" xlink:type="simple"/></inline-formula>, tend to be smaller in magnitude, but have an equal sign and relative ordering, as the static ME interactions, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e163" xlink:type="simple"/></inline-formula>. Some degree of correspondence is expected: an interaction between neurons <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e164" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e165" xlink:type="simple"/></inline-formula> in the static ME model captures the combined effect of the stimulus and noise correlations, while in the corresponding S2 interaction, (most of) the stimulus correlation has been factored out into the correlated dynamics of the inputs to the neurons <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e166" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e167" xlink:type="simple"/></inline-formula>, i.e. <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e168" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e169" xlink:type="simple"/></inline-formula>. The surprisingly high degree of correspondence, however, indicates that even the interactions learned from static maximum entropy models can account for, up to a scaling factor, the pairwise neuron dependencies that are <italic>not</italic> due to the correlated stimulus inputs.</p>
      </sec>
      <sec id="s2e">
        <title>Pairwise SDME (S2) model partitions the space of activity patterns into clusters that generalize to testing data</title>
        <p><xref ref-type="fig" rid="pcbi-1002922-g004">Figs. 4a,b</xref> show the richness of activity patterns produced in response to repeats of the same stimulus. While these patterns must encode the same information, it is not clear how this could be established by looking at the patterns alone (without prior knowledge that they were generated in response to the same stimulus), because of the high dimensionality of the pattern space. Is there a way to simplify this response space? We suggest one such approach here, motivated by the analogy to Ising models in statistical physics and the related similarities with the Hopfield networks <xref ref-type="bibr" rid="pcbi.1002922-Tkaik1">[27]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Tkaik2">[32]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Tkaik6">[62]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Hopfield1">[63]</xref>.</p>
        <p>At every instant in time, the probability of any activity pattern <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e170" xlink:type="simple"/></inline-formula> in the S2 model is fully specified by the distribution with an exponential form given by <xref ref-type="disp-formula" rid="pcbi.1002922.e083">Eq. (4)</xref>. In analogy to statistical physics, the exponent is the (negative) energy of the state <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e171" xlink:type="simple"/></inline-formula>. This energy function defines an instantaneous “energy landscape” over the space of all possible activity patterns. Minima in this landscape can be viewed as metastable patterns or attractors, and all activity patterns can be assigned to their respective attractors by descending on the energy landscape until the closest local minimum is reached, much like in the Hopfield network. In this way, the space of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e172" xlink:type="simple"/></inline-formula> patterns is partitioned, at each point in time, into a number of domains centered on the metastable states. How useful is this representation of the response space? Using the S2 model fit on training repeats, we examined neural responses in every time bin across all testing repeats. We assigned each response pattern from testing data to its corresponding metastable state. <xref ref-type="fig" rid="pcbi-1002922-g009">Fig. 9a</xref> shows, as a function of time, all identified metastable states, their energies (i.e. the negative log probability of that state), and the number of repeats on which a pattern belonging to that state was emitted. This analysis still paints a rich, but already much simplified picture of the retinal responses, where many patterns are grouped into a small number of clusters centered on the metastable states. Interestingly, these assignments generalize very well: in <xref ref-type="fig" rid="pcbi-1002922-g009">Fig. 9b</xref> we independently identify the metastable states on testing and training sets for each time bin, assign all patterns seen in the experiment to these states, and count and compare how many times each state appears on testing and training repeats. Virtually all (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e173" xlink:type="simple"/></inline-formula>) metastable states appearing in training repeats are found on testing repeats and vice versa, and this intersection is vastly larger than the intersection of the activity patterns themselves, a lot of which can appear only once in all 626 repeats. Moreover, the frequency with which patterns belonging to a particular metastable state occur is reproducible between the training and test data, suggesting that the partitioning of the high-dimensional activity space into clusters defined by the energy function of the S2 model is a productive dimensionality reduction method in this context.</p>
        <fig id="pcbi-1002922-g009" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g009</object-id>
          <label>Figure 9</label>
          <caption>
            <title>Clustering of response patterns into basins of attraction centered on meta-stable patterns generalizes across repeats.</title>
            <p><bold>a</bold>) Every response pattern <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e174" xlink:type="simple"/></inline-formula> from data is assigned to its corresponding meta-stable pattern <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e175" xlink:type="simple"/></inline-formula> by descending on the energy landscape <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e176" xlink:type="simple"/></inline-formula> defined by the S2 model of <xref ref-type="disp-formula" rid="pcbi.1002922.e083">Eq (4)</xref> until the local minimum is reached (see text). Across all test repeats and at each point in time (horizontal axis), we find the metastable states that are visited more than 30 times, plot their energy <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e177" xlink:type="simple"/></inline-formula> (vertical axis), and the number of repeats on which that metastable state is visited (shade of red). <bold>b</bold>) Inset: for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e178" xlink:type="simple"/></inline-formula> (blue rectangle in a), we plot the frequency of visit to each metastable state (dots) in the training set (horizontal) against the frequency in the test set (vertical). Main panel: the same analysis across all time bins (different colors) superposed, dashed line is equality.</p>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g009" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec id="s2f">
        <title>Pairwise SDME (S2) model reveals the strongly correlated nature of information encoding by large neural populations</title>
        <p>The S2 model is an approximation to the neural codebook, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e179" xlink:type="simple"/></inline-formula>, while the static ME model describes the population vocabulary, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e180" xlink:type="simple"/></inline-formula>. With these two distributions in hand, we can explore how the population jointly encodes the information about the stimulus into neural codewords—the joint activity patterns of spiking and silence. We make use of the fact that we can estimate the entropy of the maximum entropy distributions using a procedure of heat capacity integration, as explained in Refs. <xref ref-type="bibr" rid="pcbi.1002922-Tkaik1">[27]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Tkaik2">[32]</xref> (see <xref ref-type="sec" rid="s4">Methods</xref>). The information (in bits) that the codewords carry about the stimulus is then<disp-formula id="pcbi.1002922.e181"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002922.e181" xlink:type="simple"/><label>(7)</label></disp-formula>that is, the information can be written as a difference of the entropy of the neural vocabulary, and the noise entropy (the average of the entropy of the codebook), where the entropy is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e182" xlink:type="simple"/></inline-formula>. Because of the maximum entropy property of our model for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e183" xlink:type="simple"/></inline-formula>, the entropy of our static pairwise model in <xref ref-type="disp-formula" rid="pcbi.1002922.e157">Eq. (6)</xref> is an upper bound on the transmitted information; expressed as an entropy rate, this amounts to <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e184" xlink:type="simple"/></inline-formula>.</p>
        <p>The brain does not have direct access to the stimulus, but only receives codewords <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e185" xlink:type="simple"/></inline-formula>, drawn from <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e186" xlink:type="simple"/></inline-formula>, by the retina. It is therefore useful to estimate for every moment in time, the <italic>surprise</italic> about the output of the retina, and thus about the stimulus, which is given by <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e187" xlink:type="simple"/></inline-formula>. We, as experimenters—but not the brain—have access to stimulus repeats and thus to <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e188" xlink:type="simple"/></inline-formula>, so we can compute the average value of surprise (per unit time) at every instant <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e189" xlink:type="simple"/></inline-formula> in the stimulus:<disp-formula id="pcbi.1002922.e190"><graphic position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1002922.e190" xlink:type="simple"/><label>(8)</label></disp-formula>This quantity can be expressed using the entropies and the learned parameters of our maximum entropy models, and is plotted as a function of time in <xref ref-type="fig" rid="pcbi-1002922-g010">Fig. 10</xref>. Since averaging across time is equal to averaging over the stimulus ensemble, we see from <xref ref-type="disp-formula" rid="pcbi.1002922.e190">Eq. (8)</xref> that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e191" xlink:type="simple"/></inline-formula> would have to be identically equal to <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e192" xlink:type="simple"/></inline-formula> under the condition that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e193" xlink:type="simple"/></inline-formula> (marginalization). Since we build models for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e194" xlink:type="simple"/></inline-formula> (static ME) and <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e195" xlink:type="simple"/></inline-formula> (S2) from data independently, they need not obey the marginalization condition exactly, but they will do so if they provide a good account of the data. Indeed, by using the static ME and S2 distributions in <xref ref-type="disp-formula" rid="pcbi.1002922.e190">Eq. (8)</xref> for surprise, we find that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e196" xlink:type="simple"/></inline-formula>, very close to the entropy rate <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e197" xlink:type="simple"/></inline-formula> of the total vocabulary and within the estimated error bars of the entropy, which are <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e198" xlink:type="simple"/></inline-formula>1%.</p>
        <fig id="pcbi-1002922-g010" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002922.g010</object-id>
          <label>Figure 10</label>
          <caption>
            <title>Surprise and information transmission estimated from the pairwise SDME (S2) model.</title>
            <p>(<bold>a</bold>) Surprise rate (blue) is estimated from the static ME and S2 models assuming independence of codewords across time bins. The instantaneous information rate (red) is the difference between the surprise and the noise entropy rate, estimated from the S2 model (see text). The information transmission rate is the average of the instantaneous information across time. (<bold>b</bold>) Population firing rate as a function of time shows that bursts of spiking strongly correlate with the bursts of surprise and information transmission in the population. (<bold>c</bold>) The stimulus (normalized to zero mean and unit variance) is shown for reference as a function of time.</p>
          </caption>
          <graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002922.g010" position="float" xlink:type="simple"/>
        </fig>
        <p>To estimate the information transmission, we have to subtract the noise entropy rate from the output entropy rate <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e199" xlink:type="simple"/></inline-formula>, as dictated by <xref ref-type="disp-formula" rid="pcbi.1002922.e181">Eq. (7)</xref>. The entropy of the S2 model is an upper bound on the noise entropy; since this is not a lower bound, we cannot put a strict bound on the information transmission, but can nevertheless estimate it. <xref ref-type="fig" rid="pcbi-1002922-g010">Fig. 10</xref> shows the “instantaneous information” <xref ref-type="bibr" rid="pcbi.1002922-DeWeese1">[64]</xref>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e200" xlink:type="simple"/></inline-formula>, as a function of time; from <xref ref-type="disp-formula" rid="pcbi.1002922.e181">Eq. (7)</xref>, the mutual information rate is a time average of this quantity, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e201" xlink:type="simple"/></inline-formula>. We find <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e202" xlink:type="simple"/></inline-formula>. This quantity can be compared to the total entropy rate of the stimulus itself (which must be higher than <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e203" xlink:type="simple"/></inline-formula>), which in our case is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e204" xlink:type="simple"/></inline-formula> (see <xref ref-type="sec" rid="s4">Methods</xref>). While our estimates seem to indicate that a lot of vocabulary bandwidth (730 bit/s) is “lost” to noise (600 bit/s), the last comparison shows that the Gaussian FFF stimulus source itself is not very rich, so that the estimated information transmission takes up more than half of the actual entropy rate of the source.</p>
        <p>Lastly, we asked how important is the inclusion of pairwise interactions, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e205" xlink:type="simple"/></inline-formula>, into the S2 model, compared to the S1 model, when accounting for information transmission. We therefore estimated the noise entropy rate for the S1 model, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e206" xlink:type="simple"/></inline-formula>, which was found to be <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e207" xlink:type="simple"/></inline-formula>, considerably higher than the noise entropy of the S2 model. Crucially, this noise entropy rate is larger than the total entropy rate <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e208" xlink:type="simple"/></inline-formula> estimated above, which is impossible for consistent models of the neural codebook and the vocabulary (since it would lead to negative information rates). This failure is a quantitative demonstration of the inability of the uncoupled LN models to reproduce the statistics of the population vocabulary, as shown in <xref ref-type="fig" rid="pcbi-1002922-g006">Fig. 6b</xref>, despite a seemingly small performance difference on the level of single cell PSTH prediction.</p>
      </sec>
    </sec>
    <sec id="s3">
      <title>Discussion</title>
      <p>We presented a modeling framework for stimulus encoding by large populations of neurons, which combines an individual neuronal receptive field model, with the ability to include pairwise interactions between neurons. The result is a stimulus-dependent pairwise maximum entropy (S2) model, which is the most parsimonious model of the population response to the stimulus that reproduces the linear-nonlinear (LN) aspect of single cells, as well as the pairwise correlation structure between neurons. In two limiting cases, the S2 model reduces to known models: if the single cell parameters <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e209" xlink:type="simple"/></inline-formula> are static, S2 becomes the static pairwise maximum entropy model of the population vocabulary; if the couplings <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e210" xlink:type="simple"/></inline-formula> are 0, S2 reduces to S1, the set of uncoupled LN models.</p>
      <p>We applied this modeling framework to the salamander retina presented with Gaussian white noise stimuli, and found that the interactions between neurons play an important role in determining the detailed patterns of population response. In particular, the S2 model gave better prediction of PSTH of single cells, yielded orders-of-magnitude improvement in describing the population patterns, and captured significant aspects of noise correlations. The deviations between the S2 and the S1 model became significant for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e211" xlink:type="simple"/></inline-formula> cells, and tended to occur at “interesting” times in the stimulus, precisely when the neural population was not silent.</p>
      <p>The S2 model allowed us to improve over LN models for salamander retinal ganglion cells in terms of the PSTH prediction of single cells. But, more importantly, it gave a huge improvement in terms of describing and predicting the population activity patterns, or codewords. Interestingly, for parasol cells in the macaque retina under flickering checkerboard stimulation, the generalized linear model did not yield firing rate improvement relative to uncoupled LN models (but did improve the prediction of higher order statistics of neural activity) <xref ref-type="bibr" rid="pcbi.1002922-Pillow1">[41]</xref>. In both cases, however, the improvements reflect the role of dependencies among cells in encoding the stimulus, and their effect becomes apparent when we ask questions about information transmission by a neural population. Maximum entropy models can only put upper bounds on the total entropy and the noise entropy of the neural code (and this statement remains true even if successive codewords are not independent), and as such cannot set a strict bound, but only give an estimate, for the information transmission. Nevertheless, ignoring the inter-neuron dependencies by using the S1 model would predict the total population responses so badly that the estimated noise entropy would be higher than the upper bound on the total entropy, which is a clear impossibility. In contrast, S2 model gives noise entropy rates that are consistent with the estimate from the static maximum entropy model, and transmission rates that amount to about 60% of the source entropy rate (comparable to estimates of coding efficiency in single neurons, e.g., Ref. <xref ref-type="bibr" rid="pcbi.1002922-Strong1">[65]</xref>).</p>
      <p>An alternative approach to describing the joint response of large populations of neurons to external stimuli has been presented in Ref <xref ref-type="bibr" rid="pcbi.1002922-Pillow1">[41]</xref>. The Generalized Linear Model (GLM) gives a generative model from which one can sample simulated responses to new stimuli, relying on activity history and temporal dependencies between cells. The crucial assumption of the GLM is that the responses of the neurons are conditionally independent given the stimulus and the spiking history; to satisfy this assumption, the discretization of time has to be as fine grained as possible, but certainly well below the discretization of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e212" xlink:type="simple"/></inline-formula> or <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e213" xlink:type="simple"/></inline-formula> typically used for maximum entropy models in our retinal preparation. This conditional independence, guaranteed by very short time bins, allows tractable inference procedures to be devised for fitting the GLMs from data. On the other hand, it makes—by its very definition—successive activity patterns dependent on each other, because that is the only way to introduce interactions between the spikes. In contrast, maximum entropy models pick the time bin to be short enough such that multiple spikes are rarely observed in the same time bin, but long enough so that most of the strong spike-spike interactions (as well as fine temporal detail, such as spike-timing jitter) occur <italic>within a single bin</italic>. This allows us to view activity patterns in successive time bins as codewords (although some statistical dependence between them remains: in the SDME models this is probably due to multiple timescales on which the neurons respond to stimuli; and in the static ME model <xref ref-type="bibr" rid="pcbi.1002922-Marre1">[31]</xref> due to, in part, stimulus correlation). If we were to make the time scale in maximum entropy models much shorter, e.g. by an order of magnitude or more, we could make the conditional independence assumption of the responses given the stimuli <italic>and</italic> previous spiking. This would lead us to GLM-like models in the maximum entropy framework, e.g., to dynamic/nonequilibrium generalizations of Ising models <xref ref-type="bibr" rid="pcbi.1002922-Roudi4">[48]</xref>; in this case, however, we would again lose the interpretation where the instantaneous state of the retina is represented well by a single codeword. For this reason, GLM and SDME are complementary approaches: the first allows for a temporally-detailed probabilistic description of a spiking process, while the second gives an explicit expression for the probability distribution over codewords in longer temporal bins. To our knowledge, there is no easy way to derive one model from the other: while one can fit the GLM with a very small time bins, use it to <italic>generate</italic> rasters and re-discretize those into time bins of longer duration to get a codeword representation, building a probabilistic model for the codewords from the GLM-derived rasters is as difficult as building it for original data. While a more detailed comparison of these models is beyond the scope of the current work, it is interesting to note that these approaches are different and complementary also in terms of the potential interpretation of their parameters: GLM couplings between neurons have an intuitive interpretation in terms of causal dependency between cells, whereas the SDME ones suggest a prior on the coding vocabulary of the population (see below). Finally, from a modeling viewpoint, GLM lends itself to a clean and tractable maximum likelihood inference framework with regularization, whereas the SDME offers the tools and insights of statistical physics <xref ref-type="bibr" rid="pcbi.1002922-Tkaik1">[27]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Schneidman3">[42]</xref>–<xref ref-type="bibr" rid="pcbi.1002922-Sessak1">[53]</xref> (including, e.g., advanced Monte Carlo schemes for entropy estimation <xref ref-type="bibr" rid="pcbi.1002922-Lee1">[66]</xref> and the partitioning of the space of codewords in terms of metastable states briefly discussed in this paper).</p>
      <p>Tkačik and colleagues <xref ref-type="bibr" rid="pcbi.1002922-Tkaik6">[62]</xref> have suggested that one can interpret <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e214" xlink:type="simple"/></inline-formula> in an SDME model as a prior over the activity patterns that the population would use to optimally encode the stimulus. For low noise level they argued that the prior should be “weak” (and could help decorrelate the responses) because the population could faithfully encode the stimulus, whereas in the noisy regime, the prior should match the statistics of the sensory world and thus counteract the effects of noise. Berkes and colleagues <xref ref-type="bibr" rid="pcbi.1002922-Berkes1">[67]</xref> suggested a similar reason for the relationship between ongoing and induced activity patterns in the visual cortex. Our results show that interactions are necessary for capturing the network encoding, and implicitly reflect the existence of such a prior. The recovered interactions are strongly correlated with the interaction parameters of a static, stimulus independent model over the distribution of patterns, making it possible for the brain (which only has access to the spikes, not the stimulus) to learn these values. Whether the interactions are matched to the statistics of the visual inputs as suggested in Ref <xref ref-type="bibr" rid="pcbi.1002922-Tkaik6">[62]</xref> will be the focus of future work.</p>
      <p>The maximum entropy models presented here can be immediately applied to other brain areas where one can get stable recordings of many neurons over a few tens of minutes <xref ref-type="bibr" rid="pcbi.1002922-Ganmor2">[35]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Ganmor3">[68]</xref>. SDME could be applied to spatially structured stimuli, for instance, to capture the response to the flickering checkerboards: obtaining good estimates of the spatio-temporal receptive fields is standard procedure, identical to that in LN or GLM-type models, while fitting the parameters <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e215" xlink:type="simple"/></inline-formula> of the SDME is equally tractable on full field flicker (as presented here) or movie with spatial structure. In practice, a different tradeoff would be chosen in experimental design, by making stimulus segment longer to sample the linear filters better from many different stimuli, and decreasing the number of repeats. As we noted above, for fitting the model, one could also eliminate repeated structure altogether, yet repeated presentations of the same stimuli would still be needed to assess the model quality in terms of the PSTH. The current design of the experiment focused on a very large number of repeats of the same stimulus, to allow for as accurate estimate of the PSTH and correlations of individual cells, while future experiments could allow for evaluation of the model on novel repeated stimuli. Given the results we have presented here and those of <xref ref-type="bibr" rid="pcbi.1002922-Pillow1">[41]</xref>, we expect that the SDME models would significantly outperform the LN models on novel stimuli as well. Other potential extensions of the pairwise SDME model would include temporal dependencies as in Refs <xref ref-type="bibr" rid="pcbi.1002922-Marre1">[31]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Vasquez1">[49]</xref> or a SDME model where the pairwise interactions are also stimulus dependent. While it is not immediately clear how such dependency would be expressed for the <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e216" xlink:type="simple"/></inline-formula> (unlike the linear filter description of the single cell parameters, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e217" xlink:type="simple"/></inline-formula>'s), such a model would be instrumental for analysis of population adaptation or learning. Another extension would be to include the dependence of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e218" xlink:type="simple"/></inline-formula> on multiple stimulus projections, or to include high-order interaction terms between spikes, which are likely to play an important role for large populations responding to natural stimuli <xref ref-type="bibr" rid="pcbi.1002922-Ohiorhenuan1">[34]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Ganmor2">[35]</xref>. Finally, we also expect that sampling from larger populations, as future experiments will allow, would enable us to give a full characterization of the interaction maps between cells of different classes, which would most likely reflect independence between classes with strong correlations between the cells of the same class, or even stronger correlations at the population level including across different classes; the two alternatives represent an exciting (and still mostly unanswered) question. We expect that increasingly detailed statistical models of neural codes, and the efforts to infer such models from experimental data, will allow us to focus our attention on population-level statistics and on finding principled information-theoretic measures for quantifying the code, like the surprise and instantaneous information suggested here.</p>
    </sec>
    <sec id="s4" sec-type="methods">
      <title>Methods</title>
      <sec id="s4a">
        <title>Electrophysiology</title>
        <p>Experiments were performed on the adult tiger salamander, <italic>Ambystoma tigrinum</italic>. All experiments were in accordance with Ben-Gurion University of the Negev and government regulations. Extracted retinas were placed with the ganglion cell layer facing a multielectrode array with 252 electrodes (Ayanda Biosystems, Switzerland), and superfused with oxygenated Ringer medium at room temperature. Extracellularly recorded signals were amplified (MultiChannel Systems, Germany) and digitized at 10 kHz, and spike-sorted using custom software written in MATLAB.</p>
      </sec>
      <sec id="s4b">
        <title>Visual stimulation</title>
        <p>Stimuli were projected onto the retina from a CRT video monitor (ViewSonic G90fB) at a frame rate of 60 Hz; each movie frame was presented twice, using standard optics. Full Field Flicker (FFF) stimuli were generated by independently sampling spatially uniform gray levels (with a resolution of 8 bits) from a Gaussian distribution, with mean luminance of 147 lux and the standard deviation of 33 lux. These data allow us to estimate the entropy rate of the source (as used in the main text), by multiplying the entropy of the luminance distribution with the refresh rate. To estimate the cells' receptive fields, checkerboard stimulus was generated by selecting each checker (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e219" xlink:type="simple"/></inline-formula> on the retina) randomly every 33 ms to be either black or white. To identify the RF centers, a two-dimensional Gaussian was fitted to the spatial profile of the response. The movies were gamma corrected for the computer monitor. In all cases the visual stimulus entirely covered the retinal patch that was used for the experiment.</p>
      </sec>
      <sec id="s4c">
        <title>Estimating model statistics from data</title>
        <p>The firing rates of the cells and the overall covariance of the spiking activity are the key statistics for inferring the models we present, so the reliability of our estimates for these quantities is a key systematic issue. Previous work has shown that 10–20 minute recordings give very reliable estimates <xref ref-type="bibr" rid="pcbi.1002922-Ganmor2">[35]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Ganmor3">[68]</xref>, and that train data of similar size allows for reliable estimates of pairwise-maximum-entropy-based models for populations of this size <xref ref-type="bibr" rid="pcbi.1002922-Ganmor3">[68]</xref>. The error on instantaneous firing rate was estimated by splitting 626 repeats into two random halves 50 times, and estimating firing rate for each neuron. The relative error in the firing rate, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e220" xlink:type="simple"/></inline-formula>, estimated as (relative) std over random splits of data, scales tightly with the mean firing rate with the power <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e221" xlink:type="simple"/></inline-formula>, such that at instantaneous rates of about <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e222" xlink:type="simple"/></inline-formula> the error is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e223" xlink:type="simple"/></inline-formula>, at <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e224" xlink:type="simple"/></inline-formula> the error is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e225" xlink:type="simple"/></inline-formula>, and at <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e226" xlink:type="simple"/></inline-formula> the error is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e227" xlink:type="simple"/></inline-formula>. For correlations, we assess their significance by comparing the distribution of real correlation coefficients to the (null) distribution where the spikes for each neuron have been randomized in time. The null distribution is evaluated over one half of the repeats, because this is the data size used for training; the mean of the distribution is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e228" xlink:type="simple"/></inline-formula>, and the std <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e229" xlink:type="simple"/></inline-formula>, making 95% of observed correlations larger than this spread due to sampling. More in detail, the relative error on correlations was assessed by splitting data 50 times randomly into two halves, and seeing that the relative error scales with the value of the correlations <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e230" xlink:type="simple"/></inline-formula>, so that the typical error at significance threshold was about 60%, for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e231" xlink:type="simple"/></inline-formula> (80% of all correlations) it was 18%, for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e232" xlink:type="simple"/></inline-formula> (23% of all correlations) it was 4%, and for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e233" xlink:type="simple"/></inline-formula> it was less than 2%. The average error on significant correlations is slightly below 10%. To quantify the stability of the recordings across time, we computed for each cell the approximate drift in the firing rate, by linearly regressing the average firing rate in each repeat against the repeat index. For about half of the cells the relative change in the firing rate across the whole duration of the experiment was below 25% (average 14%), while for 80% of the cells the drift was below 50% (average 24%). To deal with the remaining non-stationarity, we selected as our training data all odd numbered repeats, and for our test data all even numbered repeats, so that the models were trained and tested across the non-stationary behavior.</p>
      </sec>
      <sec id="s4d">
        <title>Inferring SDME from data</title>
        <p>The LN model for each neuron <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e234" xlink:type="simple"/></inline-formula> consists of the linear filter <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e235" xlink:type="simple"/></inline-formula>, and the nonlinear function <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e236" xlink:type="simple"/></inline-formula>, which is defined pointwise on a set of binned values for the generator signal, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e237" xlink:type="simple"/></inline-formula>. We used binning into <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e238" xlink:type="simple"/></inline-formula> bins such that initially each bin contains roughly the same number of values for <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e239" xlink:type="simple"/></inline-formula>, but subsequently the binning is adaptively adjusted (separately for each neuron) to be denser at higher values of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e240" xlink:type="simple"/></inline-formula>, where the firing rates are higher. We fitted LN models with varying number of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e241" xlink:type="simple"/></inline-formula> bins, and have chosen <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e242" xlink:type="simple"/></inline-formula> when the performance of the LN models appeared to saturate <xref ref-type="bibr" rid="pcbi.1002922-GranotAtedgi1">[69]</xref>.</p>
        <p>To find the parameters of the stimulus-dependent maximum entropy model (<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e243" xlink:type="simple"/></inline-formula>), we retained the binning of the generator signal used for LN model construction. Given trial values for the SDME parameters, we estimated the chosen expectation values (covariance matrix <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e244" xlink:type="simple"/></inline-formula> of neural activity, and the firing rate conditional on <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e245" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e246" xlink:type="simple"/></inline-formula>) by Monte Carlo sampling from the trial distribution in <xref ref-type="disp-formula" rid="pcbi.1002922.e083">Eq. (4)</xref>; the learning step of the algorithm is computed by comparing the expectation values in the trial distribution and the empirical distribution (computed over the training half of the stimulus repeats). In detail, we used a gradient ascent algorithm, applying a combination of Gibbs sampling and importance sampling in order to efficiently estimate the gradient, by using optimizations similar to those described in Ref. <xref ref-type="bibr" rid="pcbi.1002922-Broderick1">[70]</xref>. Sampling was carried out in parallel on a 16 node cluster with two 2.66 GHz Intel Quad-Core Xeon processors and 16 GB of memory per node. The calculation was terminated when the average error in firing rates and coincident firing rates reached below 1% and 5% respectively, which is within the experimental error.</p>
        <p>To compute the single neuron PSTH and compare the distributions of codewords from the model to the empirical distribution, we used Metropolis Monte Carlo sampling to draw codewords from the model distributions; we drew 5000 independent samples (to draw uncorrelated configurations, a sample was recorded only after 100 “spin-flip” trials) for every timepoint, for a total of <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e247" xlink:type="simple"/></inline-formula> samples; the same procedure was used also to draw from the conditionally independent (T1,S1) models. To estimate the entropies of high dimensional SDME distributions, we used the “heat capacity integration” method, detailed in Ref <xref ref-type="bibr" rid="pcbi.1002922-Tkaik2">[32]</xref>. Briefly, a maximum entropy model <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e248" xlink:type="simple"/></inline-formula> (where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e249" xlink:type="simple"/></inline-formula> is the Hamiltonian function determined by the choice of constrained operators and the conjugated parameters) is extended by introducing a new parameter <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e250" xlink:type="simple"/></inline-formula>, much like the temperature in physics, so that <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e251" xlink:type="simple"/></inline-formula>. The entropy of the distribution is given by <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e252" xlink:type="simple"/></inline-formula>, where the heat capacity <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e253" xlink:type="simple"/></inline-formula>, and the variance in energy can be estimated at each <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e254" xlink:type="simple"/></inline-formula> by Monte Carlo sampling. In practice, we run a separate Monte Carlo sampling for a finely discretized interval of temperatures, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e255" xlink:type="simple"/></inline-formula>, estimate <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e256" xlink:type="simple"/></inline-formula> for each temperature, and numerically integrate to get the entropy <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e257" xlink:type="simple"/></inline-formula>. We have previously shown that this procedure yields robust entropy estimates even for large numbers of neurons <xref ref-type="bibr" rid="pcbi.1002922-Tkaik1">[27]</xref>, <xref ref-type="bibr" rid="pcbi.1002922-Tkaik2">[32]</xref>.</p>
      </sec>
      <sec id="s4e">
        <title>Evaluating the likelihood and goodness of fit</title>
        <p>To evaluate the performance of the models on the testing data, we computed (i) the average per-codeword log-likelihood (reported in the <xref ref-type="sec" rid="s2">Results</xref> section), and (ii) the GoF (goodness-of-fit) figure, reported in <xref ref-type="fig" rid="pcbi-1002922-g006">Fig. 6</xref>. Regarding (i), for model <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e258" xlink:type="simple"/></inline-formula> the log-likelihood is <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e259" xlink:type="simple"/></inline-formula>, where the average is over all testing repeats <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e260" xlink:type="simple"/></inline-formula> and all times <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e261" xlink:type="simple"/></inline-formula>. For models S1, S2, the evaluation is straightforward. For T1 model, there is a problem whenever the firing rate of a neuron in the training set is 0, which leads to undefined log likelihoods. To address this, we add a small regularizer <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e262" xlink:type="simple"/></inline-formula> to the estimated firing rates that define the T1 model, and choose <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e263" xlink:type="simple"/></inline-formula> to maximize the log-likelihood of T1 on the test set, thus giving maximal possible advantage to the T1. We also created two models by empirically sampling the frequencies of codewords on training (testing) data. Sampling the frequencies on testing data and evaluating on testing data gives the absolute lower bound to the log likelihood. When the frequencies are sampled on training data, we again face a possible problem for codewords whose empirical probability is 0, but which occur in test data. We introduce a pseudocount regularizer to give these codewords non-zero probability, and set the regularizer to maximize the log-likelihood on testing data, again maximally favoring this model. Regarding (ii), we compute GoF (goodness-of-fit) figure as std<inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e264" xlink:type="simple"/></inline-formula>, where <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e265" xlink:type="simple"/></inline-formula>. <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e266" xlink:type="simple"/></inline-formula> is the empirical probability of a codeword on the test set, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e267" xlink:type="simple"/></inline-formula> is its model probability, <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e268" xlink:type="simple"/></inline-formula> is the expected error on <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pcbi.1002922.e269" xlink:type="simple"/></inline-formula>, computed from the multinomial variance for every codeword given its empirical probability, and the std is taken over all non-silent patterns of the test set plotted in <xref ref-type="fig" rid="pcbi-1002922-g006">Fig. 6</xref>, top row.</p>
      </sec>
    </sec>
  </body>
  <back>
    <ref-list>
      <title>References</title>
      <ref id="pcbi.1002922-Rieke1">
        <label>1</label>
        <mixed-citation publication-type="other" xlink:type="simple">Rieke F, Warland D, de Ruyter van Steveninck RR, Bialek W (1996) Spikes: Exploring the Neural Code. Cambridge: MIT Press. 395 p.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-AgerayArcas1">
        <label>2</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Agüera y Arcas</surname><given-names>B</given-names></name>, <name name-style="western"><surname>Fairhall</surname><given-names>AL</given-names></name> (<year>2003</year>) <article-title>What causes a neuron to spike?</article-title> <source>Neural Comput</source> <volume>15</volume>: <fpage>1789</fpage>–<lpage>1807</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Bialek1">
        <label>3</label>
        <mixed-citation publication-type="other" xlink:type="simple">Bialek W, de Ruyter van Steveninck RR (2005) Features and dimensions: Motion estimation in fly vision. <italic>arXiv.org</italic>:q-bio/0505003.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Schwartz1">
        <label>4</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schwartz</surname><given-names>O</given-names></name>, <name name-style="western"><surname>Pillow</surname><given-names>JW</given-names></name>, <name name-style="western"><surname>Rust</surname><given-names>NC</given-names></name>, <name name-style="western"><surname>Simoncelli</surname><given-names>EP</given-names></name> (<year>2006</year>) <article-title>Spike-triggered neural characterization</article-title>. <source>J Vis</source> <volume>6</volume>: <fpage>484</fpage>–<lpage>507</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Stopfer1">
        <label>5</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Stopfer</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Bhagavan</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Smith</surname><given-names>BH</given-names></name>, <name name-style="western"><surname>Laurent</surname><given-names>G</given-names></name> (<year>1997</year>) <article-title>Impaired odour discrimination on desynchronization of odour-encoding neural assemblies</article-title>. <source>Nature</source> <volume>390</volume>: <fpage>70</fpage>–<lpage>4</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Riehle1">
        <label>6</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Riehle</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Grün</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Diesmann</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Aertsen</surname><given-names>A</given-names></name> (<year>1997</year>) <article-title>Spike synchronization and rate modulation differentially involved in motor cortical function</article-title>. <source>Science</source> <volume>278</volume>: <fpage>1950</fpage>–<lpage>3</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Harris1">
        <label>7</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Harris</surname><given-names>KD</given-names></name>, <name name-style="western"><surname>Csicsvari</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Hirase</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Dragoi</surname><given-names>G</given-names></name>, <name name-style="western"><surname>Buzsáki</surname><given-names>G</given-names></name> (<year>2003</year>) <article-title>Organization of cell assemblies in the hippocampus</article-title>. <source>Nature</source> <volume>424</volume>: <fpage>552</fpage>–<lpage>6</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Averbeck1">
        <label>8</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Averbeck</surname><given-names>BB</given-names></name>, <name name-style="western"><surname>Lee</surname><given-names>D</given-names></name> (<year>2004</year>) <article-title>Coding and transmission of information by neural ensembles</article-title>. <source>Trends Neurosci</source> <volume>27</volume>: <fpage>225</fpage>–<lpage>30</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Brunel1">
        <label>9</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Brunel</surname><given-names>N</given-names></name>, <name name-style="western"><surname>Nadal</surname><given-names>JP</given-names></name> (<year>1998</year>) <article-title>Mutual information, Fisher information, and population coding</article-title>. <source>Neural Comp</source> <volume>10</volume>: <fpage>1731</fpage>–<lpage>1757</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Abbott1">
        <label>10</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Abbott</surname><given-names>LF</given-names></name>, <name name-style="western"><surname>Dayan</surname><given-names>P</given-names></name> (<year>1998</year>) <article-title>The Effect of Correlated Variability on the Accuracy of a Population Code</article-title>. <source>Neural Comp</source> <volume>11</volume>: <fpage>91</fpage>–<lpage>102</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Sompolinsky1">
        <label>11</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sompolinsky</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Yoon</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Kang</surname><given-names>K</given-names></name>, <name name-style="western"><surname>Shamir</surname><given-names>M</given-names></name> (<year>2001</year>) <article-title>Population coding in neuronal systems with correlated noise</article-title>. <source>Phys Rev E</source> <volume>64</volume>: <fpage>8095</fpage>–<lpage>8100</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Schneidman1">
        <label>12</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schneidman</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Bialek</surname><given-names>W</given-names></name>, <name name-style="western"><surname>Berry</surname><given-names>MJ</given-names><suffix>2nd</suffix></name> (<year>2003</year>) <article-title>Synergy, redundancy, and independence in population codes</article-title>. <source>J Neurosci</source> <volume>23</volume>: <fpage>11539</fpage>–<lpage>53</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Pola1">
        <label>13</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pola</surname><given-names>G</given-names></name>, <name name-style="western"><surname>Thiele</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Hoffmann</surname><given-names>K-P</given-names></name>, <name name-style="western"><surname>Panzeri</surname><given-names>S</given-names></name> (<year>2003</year>) <article-title>An exact method to quantify the information transmitted by different mechanisms of correlational coding</article-title>. <source>Network: Comput Neural Syst</source> <volume>14</volume>: <fpage>35</fpage>–<lpage>60</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Nirenberg1">
        <label>14</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Nirenberg</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Latham</surname><given-names>PE</given-names></name> (<year>2003</year>) <article-title>Decoding neuronal spike trains: How important are correlations?</article-title> <source>Proc Natl Acad Sci USA</source> <volume>100</volume>: <fpage>7348</fpage>–<lpage>7353</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Averbeck2">
        <label>15</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Averbeck</surname><given-names>B</given-names></name>, <name name-style="western"><surname>Latham</surname><given-names>PR</given-names></name>, <name name-style="western"><surname>Pouget</surname><given-names>A</given-names></name> (<year>2006</year>) <article-title>Neural correlations, population coding and computation</article-title>. <source>Nat Rev Neurosci</source> <volume>7</volume>: <fpage>358</fpage>–<lpage>366</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Bair1">
        <label>16</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bair</surname><given-names>W</given-names></name>, <name name-style="western"><surname>Zohary</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Newsome</surname><given-names>WT</given-names></name> (<year>2001</year>) <article-title>Correlated firing in macaque visual area mt: time scales and relationship to behavior</article-title>. <source>J Neurosci</source> <volume>21</volume>: <fpage>1676</fpage>–<lpage>97</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Ecker1">
        <label>17</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ecker</surname><given-names>AS</given-names></name>, <name name-style="western"><surname>Berens</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Keliris</surname><given-names>GA</given-names></name>, <name name-style="western"><surname>Bethge</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Logothetis</surname><given-names>NK</given-names></name>, <etal>et al</etal>. (<year>2010</year>) <article-title>Decorrelated neuronal firing in cortical microcircuits</article-title>. <source>Science</source> <volume>327</volume>: <fpage>584</fpage>–<lpage>7</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Schneidman2">
        <label>18</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schneidman</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Berry</surname><given-names>MJ</given-names><suffix>2nd</suffix></name>, <name name-style="western"><surname>Segev</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Bialek</surname><given-names>W</given-names></name> (<year>2006</year>) <article-title>Weak pairwise correlations imply strongly correlated network states in a neural population</article-title>. <source>Nature</source> <volume>440</volume>: <fpage>1007</fpage>–<lpage>12</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Puchalla1">
        <label>19</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Puchalla</surname><given-names>JL</given-names></name>, <name name-style="western"><surname>Schneidman</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Harris</surname><given-names>RA</given-names></name>, <name name-style="western"><surname>Berry</surname><given-names>MJ</given-names><suffix>2nd</suffix></name> (<year>2005</year>) <article-title>Redundancy in the population code of the retina</article-title>. <source>Neuron</source> <volume>46</volume>: <fpage>493</fpage>–<lpage>504</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Narayanan1">
        <label>20</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Narayanan</surname><given-names>NS</given-names></name>, <name name-style="western"><surname>Kimchi</surname><given-names>EY</given-names></name>, <name name-style="western"><surname>Laubach</surname><given-names>M</given-names></name> (<year>2005</year>) <article-title>Redundancy and synergy of neuronal ensembles in motor cortex</article-title>. <source>J Neurosci</source> <volume>25</volume>: <fpage>4207</fpage>–<lpage>16</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Chechik1">
        <label>21</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chechik</surname><given-names>G</given-names></name>, <name name-style="western"><surname>Anderson</surname><given-names>MJ</given-names></name>, <name name-style="western"><surname>Bar-Yosef</surname><given-names>O</given-names></name>, <name name-style="western"><surname>Young</surname><given-names>ED</given-names></name>, <name name-style="western"><surname>Tishby</surname><given-names>N</given-names></name>, <etal>et al</etal>. (<year>2006</year>) <article-title>Reduction of information redundancy in the ascending auditory pathway</article-title>. <source>Neuron</source> <volume>51</volume>: <fpage>359</fpage>–<lpage>68</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Nirenberg2">
        <label>22</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Nirenberg</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Carcieri</surname><given-names>SM</given-names></name>, <name name-style="western"><surname>Jacobs</surname><given-names>AL</given-names></name>, <name name-style="western"><surname>Latham</surname><given-names>PE</given-names></name> (<year>2001</year>) <article-title>Retinal ganglion cells act largely as independent encoders</article-title>. <source>Nature</source> <volume>411</volume>: <fpage>698</fpage>–<lpage>701</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Barlow1">
        <label>23</label>
        <mixed-citation publication-type="other" xlink:type="simple">Barlow HB (1961) Possible principles underlying the transformation of sensory messages. In: Rosenblith W, editor. Sensory communication. Cambridge: MIT Press. pp 217–234.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Atick1">
        <label>24</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Atick</surname><given-names>JJ</given-names></name>, <name name-style="western"><surname>Redlich</surname><given-names>AN</given-names></name> (<year>1990</year>) <article-title>Towards a theory of early visual processing</article-title>. <source>Neural Comp</source> <volume>2</volume>: <fpage>308</fpage>–<lpage>320</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Barlow2">
        <label>25</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Barlow</surname><given-names>H</given-names></name> (<year>2001</year>) <article-title>Redundancy reduction revisited</article-title>. <source>Network</source> <volume>12</volume>: <fpage>241</fpage>–<lpage>53</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Schnitzer1">
        <label>26</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schnitzer</surname><given-names>MJ</given-names></name>, <name name-style="western"><surname>Meister</surname><given-names>M</given-names></name> (<year>2003</year>) <article-title>Multineuronal firing patterns in the signal from eye to brain</article-title>. <source>Neuron</source> <volume>37</volume>: <fpage>499</fpage>–<lpage>511</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Tkaik1">
        <label>27</label>
        <mixed-citation publication-type="other" xlink:type="simple">Tkac˘ik G, Schneidman E, Berry MJ 2nd, Bialek W (2006) Ising models for networks of real neurons. <italic>arXiv.org</italic>: q-bio/0611072.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Shlens1">
        <label>28</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Shlens</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Field</surname><given-names>GD</given-names></name>, <name name-style="western"><surname>Gaulthier</surname><given-names>JL</given-names></name>, <name name-style="western"><surname>Grivich</surname><given-names>MI</given-names></name>, <name name-style="western"><surname>Petrusca</surname><given-names>D</given-names></name>, <etal>et al</etal>. (<year>2006</year>) <article-title>The structure of multi-neuron firing patterns in primate retina</article-title>. <source>J Neurosci</source> <volume>26</volume>: <fpage>8254</fpage>–<lpage>66</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Tang1">
        <label>29</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Tang</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Jackson</surname><given-names>D</given-names></name>, <name name-style="western"><surname>Hobbs</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Chen</surname><given-names>W</given-names></name>, <name name-style="western"><surname>Smith</surname><given-names>JL</given-names></name>, <etal>et al</etal>. (<year>2008</year>) <article-title>A maximum entropy model applied to spatial and temporal correlations from cortical networks <italic>in vitro</italic></article-title>. <source>J Neurosci</source> <volume>28</volume>: <fpage>505</fpage>–<lpage>518</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Shlens2">
        <label>30</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Shlens</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Field</surname><given-names>GD</given-names></name>, <name name-style="western"><surname>Gaulthier</surname><given-names>JL</given-names></name>, <name name-style="western"><surname>Greschner</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Sher</surname><given-names>A</given-names></name>, <etal>et al</etal>. (<year>2009</year>) <article-title>The structure of large-scale synchronized firing in primate retina</article-title>. <source>J Neurosci</source> <volume>29</volume>: <fpage>5022</fpage>–<lpage>31</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Marre1">
        <label>31</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Marre</surname><given-names>O</given-names></name>, <name name-style="western"><surname>Boustani</surname><given-names>SE</given-names></name>, <name name-style="western"><surname>Fregnac</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Destexhe</surname><given-names>A</given-names></name> (<year>2009</year>) <article-title>Prediction of spatio–temporal patterns of neural activity from pairwise correlations</article-title>. <source>Phys Rev Lett</source> <volume>102</volume>: <fpage>138101</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Tkaik2">
        <label>32</label>
        <mixed-citation publication-type="other" xlink:type="simple">Tkac˘ik G, Schneidman E, Berry MJ 2nd, Bialek W (2009) Spin-glass models for a network of real neurons. <italic>arXiv.org</italic>: 0912.5409 (2009).</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Ganmor1">
        <label>33</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ganmor</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Segev</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Schneidman</surname><given-names>E</given-names></name> (<year>2011</year>) <article-title>The architecture of functional interaction networks in the retina</article-title>. <source>J Neurosci</source> <volume>31</volume>: <fpage>3044</fpage>–<lpage>54</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Ohiorhenuan1">
        <label>34</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ohiorhenuan</surname><given-names>IE</given-names></name>, <name name-style="western"><surname>Mechler</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Purpura</surname><given-names>KP</given-names></name>, <name name-style="western"><surname>Schmid</surname><given-names>AM</given-names></name>, <name name-style="western"><surname>Hu</surname><given-names>Q</given-names></name>, <etal>et al</etal>. (<year>2010</year>) <article-title>Sparse coding and high-order correlations in fine-scale cortical networks</article-title>. <source>Nature</source> <volume>466</volume>: <fpage>617</fpage>–<lpage>21</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Ganmor2">
        <label>35</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ganmor</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Segev</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Schneidman</surname><given-names>E</given-names></name> (<year>2011</year>) <article-title>Sparse low-order interaction network underlies a highly correlated and learnable neural population code</article-title>. <source>Proc Nat'l Acad Sci USA</source> <volume>108</volume>: <fpage>9679</fpage>–<lpage>84</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Tkaik3">
        <label>36</label>
        <mixed-citation publication-type="other" xlink:type="simple">Tkac˘ik G, Marre O, Mora T, Amodei D, Berry MJ 2nd, <etal>et al</etal>.. (2012) The simplest maximum entropy model for collective behavior in a neural network. <italic>arXiv.org:</italic> 1207.6319. J Stat Mech, in press.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Warland1">
        <label>37</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Warland</surname><given-names>DK</given-names></name>, <name name-style="western"><surname>Reinagel</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Meister</surname><given-names>M</given-names></name> (<year>1997</year>) <article-title>Decoding visual information from a population of retinal ganglion cells</article-title>. <source>J Neurophys</source> <volume>78</volume>: <fpage>2336</fpage>–<lpage>2350</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Dan1">
        <label>38</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Dan</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Alonso</surname><given-names>JM</given-names></name>, <name name-style="western"><surname>Usrey</surname><given-names>WM</given-names></name>, <name name-style="western"><surname>Reid</surname><given-names>RC</given-names></name> (<year>1998</year>) <article-title>Coding of visual information by precisely correlated spikes in the lateral geniculate nucleus</article-title>. <source>Nat Neurosci</source> <volume>1</volume>: <fpage>501</fpage>–<lpage>7</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Hatsopoulos1">
        <label>39</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hatsopoulos</surname><given-names>NG</given-names></name>, <name name-style="western"><surname>Ojakangas</surname><given-names>CL</given-names></name>, <name name-style="western"><surname>Paninski</surname><given-names>L</given-names></name>, <name name-style="western"><surname>Donoghue</surname><given-names>JP</given-names></name> (<year>1998</year>) <article-title>Information about movement direction obtained from synchronous activity of motor cortical neurons</article-title>. <source>Proc Natl Acad Sci USA</source> <volume>95</volume>: <fpage>15706</fpage>–<lpage>11</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Brown1">
        <label>40</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Brown</surname><given-names>EN</given-names></name>, <name name-style="western"><surname>Frank</surname><given-names>LM</given-names></name>, <name name-style="western"><surname>Tang</surname><given-names>D</given-names></name>, <name name-style="western"><surname>Quirk</surname><given-names>MC</given-names></name>, <name name-style="western"><surname>Wilson</surname><given-names>MA</given-names></name> (<year>1998</year>) <article-title>A statistical paradigm for neural spike train decoding applied to position prediction from ensemble firing patterns of rat hippocampal place cells</article-title>. <source>J Neurosci</source> <volume>18</volume>: <fpage>7411</fpage>–<lpage>25</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Pillow1">
        <label>41</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pillow</surname><given-names>JW</given-names></name>, <name name-style="western"><surname>Shlens</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Paninski</surname><given-names>L</given-names></name>, <name name-style="western"><surname>Shear</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Litke</surname><given-names>AM</given-names></name>, <etal>et al</etal>. (<year>2008</year>) <article-title>Spatio-temporal correlations and visual signaling in a complete neural population</article-title>. <source>Nature</source> <volume>454</volume>: <fpage>995</fpage>–<lpage>9</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Schneidman3">
        <label>42</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schneidman</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Still</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Berry</surname><given-names>MJ</given-names><suffix>2nd</suffix></name>, <name name-style="western"><surname>Bialek</surname><given-names>W</given-names></name> (<year>2003</year>) <article-title>Network information and connected correlations</article-title>. <source>Phys Rev Lett</source> <volume>91</volume>: <fpage>238701</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Cocco1">
        <label>43</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cocco</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Leibler</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Monasson</surname><given-names>R</given-names></name> (<year>2009</year>) <article-title>Neuronal couplings between retinal ganglion cells inferred by efficient inverse statistical physics methods</article-title>. <source>Proc Nat'l Acad Sci USA</source> <volume>106</volume>: <fpage>14058</fpage>–<lpage>62</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Cocco2">
        <label>44</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cocco</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Monasson</surname><given-names>R</given-names></name> (<year>2011</year>) <article-title>Adaptive cluster expansion for inferring Boltzmann machines with noisy data</article-title>. <source>Phys Rev Lett</source> <volume>106</volume>: <fpage>090601</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Roudi1">
        <label>45</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Roudi</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Aurell</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Hertz</surname><given-names>JA</given-names></name> (<year>2009</year>) <article-title>Statistical physics of pairwise probability models</article-title>. <source>Front Comput Neurosci</source> <volume>3</volume>: <fpage>22</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Roudi2">
        <label>46</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Roudi</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Nirenberg</surname><given-names>S</given-names></name>, <name name-style="western"><surname>Latham</surname><given-names>PE</given-names></name> (<year>2009</year>) <article-title>Pairwise maximum entropy models for studying large biological systems: when they can work and when they can't</article-title>. <source>PLoS Comput Biol</source> <volume>5</volume>: <fpage>e1000380</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Roudi3">
        <label>47</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Roudi</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Trycha</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Hertz</surname><given-names>J</given-names></name> (<year>2009</year>) <article-title>The ising model for neural data: model quality and approximate methods for extracting functional connectivity</article-title>. <source>Phys Rev E</source> <volume>79</volume>: <fpage>051915</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Roudi4">
        <label>48</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Roudi</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Hertz</surname><given-names>J</given-names></name> (<year>2011</year>) <article-title>Mean field theory for nonequilibrium network reconstruction</article-title>. <source>Phys Rev Lett</source> <volume>106</volume>: <fpage>048702</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Vasquez1">
        <label>49</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Vasquez</surname><given-names>JC</given-names></name>, <name name-style="western"><surname>Marre</surname><given-names>O</given-names></name>, <name name-style="western"><surname>Palacios</surname><given-names>AG</given-names></name>, <name name-style="western"><surname>Berry</surname><given-names>MJ</given-names><suffix>2nd</suffix></name>, <name name-style="western"><surname>Cessac</surname><given-names>B</given-names></name> (<year>2012</year>) <article-title>Gibbs distribution analysis of temporal correlations structure in retina ganglion cells</article-title>. <source>J Physiol Paris</source> <volume>106</volume>: <fpage>120</fpage>–<lpage>7</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Macke1">
        <label>50</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Macke</surname><given-names>JH</given-names></name>, <name name-style="western"><surname>Opper</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Bethge</surname><given-names>M</given-names></name> (<year>2011</year>) <article-title>Common input explains higher-order correlations and entropy in a simple model of neural population activity</article-title>. <source>Phys Rev Lett</source> <volume>106</volume>: <fpage>208102</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Mezard1">
        <label>51</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Mezard</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Mora</surname><given-names>T</given-names></name> (<year>2009</year>) <article-title>Constraint satisfaction problems and neural networks: a statistical physics perspective</article-title>. <source>J Physiol Paris</source> <volume>103</volume>: <fpage>107</fpage>–<lpage>113</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Cessac1">
        <label>52</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cessac</surname><given-names>B</given-names></name>, <name name-style="western"><surname>Rostro</surname><given-names>H</given-names></name>, <name name-style="western"><surname>Vasques</surname><given-names>JC</given-names></name>, <name name-style="western"><surname>Vieville</surname><given-names>T</given-names></name> (<year>2009</year>) <article-title>How Gibbs distributions may naturally arise from synaptic adaptation mechanisms</article-title>. <source>J Stat Phys</source> <volume>136</volume>: <fpage>565</fpage>–<lpage>602</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Sessak1">
        <label>53</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sessak</surname><given-names>V</given-names></name>, <name name-style="western"><surname>Monasson</surname><given-names>R</given-names></name> (<year>2009</year>) <article-title>Small-correlation expansions for the inverse Ising problem</article-title>. <source>J Phys A</source> <volume>42</volume>: <fpage>055001</fpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Segev1">
        <label>54</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Segev</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Goodhouse</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Puchalla</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Berry</surname><given-names>MJ</given-names><suffix>2nd</suffix></name> (<year>2004</year>) <article-title>Recording spikes from a large fraction of the ganglion cells in a retinal patch</article-title>. <source>Nat Neurosci</source> <volume>7</volume>: <fpage>1154</fpage>–<lpage>61</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Fairhall1">
        <label>55</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Fairhall</surname><given-names>AL</given-names></name>, <name name-style="western"><surname>Burlingame</surname><given-names>CA</given-names></name>, <name name-style="western"><surname>Narasimhan</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Harris</surname><given-names>RA</given-names></name>, <name name-style="western"><surname>Puchalla</surname><given-names>JL</given-names></name>, <etal>et al</etal>. (<year>2006</year>) <article-title>Selectivity for multiple stimulus features in retinal ganglion cells</article-title>. <source>J Neurophysiol</source> <volume>96</volume>: <fpage>2724</fpage>–<lpage>2738</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Tkaik4">
        <label>56</label>
        <mixed-citation publication-type="other" xlink:type="simple">Tkac˘ik G, Ghosh A, Schneidman E, Segev R (2012) Retinal adaptation and invariance to changes in higher-order stimulus statistics. <italic>arXiv.org</italic>: 1201.3552.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Keat1">
        <label>57</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Keat</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Reinagel</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Reid</surname><given-names>RC</given-names></name>, <name name-style="western"><surname>Meister</surname><given-names>M</given-names></name> (<year>2001</year>) <article-title>Predicting every spike: a model for the responses of visual neurons</article-title>. <source>Neuron</source> <volume>30</volume>: <fpage>803</fpage>–<lpage>817</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Ozuysal1">
        <label>58</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ozuysal</surname><given-names>Y</given-names></name>, <name name-style="western"><surname>Baccus</surname><given-names>SA</given-names></name> (<year>2012</year>) <article-title>Linking the computational structure of variance adaptation to biophysical mechanisms</article-title>. <source>Neuron</source> <volume>73</volume>: <fpage>1002</fpage>–<lpage>1015</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Tkaik5">
        <label>59</label>
        <mixed-citation publication-type="other" xlink:type="simple">Tkac˘ik G (2007) Information flow in biological networks PhD Dissertation.. Princeton (New Jersey, USA): Department of Physics, Princeton University. 157 p.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-GranotAtdegi1">
        <label>60</label>
        <mixed-citation publication-type="other" xlink:type="simple">Granot-Atdegi E, Tkac˘ik G, Segev R, Schneidman E (2010) A stimulus-dependent maximum entropy model of the retinal population neural code Abstract.. In: Front Neurosci Conference Abstract; 25–28 February 2010; Salt Lake City, Utah, United States. COSYNE 2010.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Jaynes1">
        <label>61</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Jaynes</surname><given-names>ET</given-names></name> (<year>1957</year>) <article-title>Information theory and statistical mechanics</article-title>. <source>Phys Rev</source> <volume>106</volume>: <fpage>620</fpage>–<lpage>630</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Tkaik6">
        <label>62</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Tkac˘ik</surname><given-names>G</given-names></name>, <name name-style="western"><surname>Prentice</surname><given-names>JS</given-names></name>, <name name-style="western"><surname>Balasubramanian</surname><given-names>V</given-names></name>, <name name-style="western"><surname>Schneidman</surname><given-names>E</given-names></name> (<year>2010</year>) <article-title>Optimal population coding by noisy spiking neurons</article-title>. <source>Proc Nat'l Acad Sci USA</source> <volume>107</volume>: <fpage>14419</fpage>–<lpage>14424</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Hopfield1">
        <label>63</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hopfield</surname><given-names>JJ</given-names></name> (<year>1982</year>) <article-title>Neural networks and physical systems with emergent collective computational abilities</article-title>. <source>Proc Nat'l Acad Sci USA</source> <volume>79</volume>: <fpage>2554</fpage>–<lpage>2558</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-DeWeese1">
        <label>64</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>DeWeese</surname><given-names>MR</given-names></name>, <name name-style="western"><surname>Meister</surname><given-names>M</given-names></name> (<year>1999</year>) <article-title>How to measure the information gained from one symbol</article-title>. <source>Network</source> <volume>10</volume>: <fpage>325</fpage>–<lpage>40</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Strong1">
        <label>65</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Strong</surname><given-names>SP</given-names></name>, <name name-style="western"><surname>Koberle</surname><given-names>R</given-names></name>, <name name-style="western"><surname>de Ruyter van Steveninck</surname><given-names>RR</given-names></name>, <name name-style="western"><surname>Bialek</surname><given-names>W</given-names></name> (<year>1998</year>) <article-title>Entropy and information in neural spike trains</article-title>. <source>Phys Rev Lett</source> <volume>80</volume>: <fpage>197</fpage>–<lpage>200</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Lee1">
        <label>66</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lee</surname><given-names>J</given-names></name> (<year>1993</year>) <article-title>New Monte Carlo algorithm: entropic sampling</article-title>. <source>Phys Rev Lett</source> <volume>71</volume>: <fpage>211</fpage>–<lpage>214</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Berkes1">
        <label>67</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Berkes</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Orban</surname><given-names>G</given-names></name>, <name name-style="western"><surname>Lengyel</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Fiser</surname><given-names>J</given-names></name> (<year>2011</year>) <article-title>Spontaneous cortical activity reveals hallmarks of an optimal internal model of the environment</article-title>. <source>Science</source> <volume>331</volume>: <fpage>83</fpage>–<lpage>7</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Ganmor3">
        <label>68</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ganmor</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Segev</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Schneidman</surname><given-names>E</given-names></name> (<year>2009</year>) <article-title>How fast can we learn maximum entropy models of neural populations?</article-title> <source>J Phys 197</source> <volume>012020</volume>: <fpage>1</fpage>–<lpage>9</lpage>.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-GranotAtedgi1">
        <label>69</label>
        <mixed-citation publication-type="other" xlink:type="simple">Granot-Atedgi E (2009) Stimulus-dependent maximum entropy models and decoding of naturalistic movies from large populations of retinal neurons MSc Thesis.. Rehovot (Israel): Neurobiology Department, Weizmann Institute of Science. 70 p.</mixed-citation>
      </ref>
      <ref id="pcbi.1002922-Broderick1">
        <label>70</label>
        <mixed-citation publication-type="other" xlink:type="simple">Broderick T, Dudik M, Tkac˘ik G, Schapire RE, Bialek W (2007) Faster solutions of the inverse pairwise Ising problem. <italic>arXiv.org</italic>: 0712.2437.</mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>