Subversion Repositories planix.SVN

Rev

Rev 2 | Blame | Compare with Previous | Last modification | View Log | RSS feed

/*
 * nvidia tegra 2 architecture-specific stuff
 */

#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "../port/error.h"
#include "io.h"
#include "arm.h"

#include "../port/netif.h"
#include "etherif.h"
#include "../port/flashif.h"
#include "../port/usb.h"
#include "../port/portusbehci.h"
#include "usbehci.h"

enum {
        /* hardware limits imposed by register contents or layouts */
        Maxcpus         = 4,
        Maxflowcpus     = 2,

        Debug   = 0,
};

typedef struct Clkrst Clkrst;
typedef struct Diag Diag;
typedef struct Flow Flow;
typedef struct Scu Scu;
typedef struct Power Power;

struct Clkrst {
        ulong   rstsrc;
        ulong   rstdevl;
        ulong   rstdevh;
        ulong   rstdevu;

        ulong   clkoutl;
        ulong   clkouth;
        ulong   clkoutu;

        uchar   _pad0[0x24-0x1c];
        ulong   supcclkdiv;             /* super cclk divider */
        ulong   _pad1;
        ulong   supsclkdiv;             /* super sclk divider */

        uchar   _pad4[0x4c-0x30];
        ulong   clkcpu;

        uchar   _pad1[0xe0-0x50];
        ulong   pllxbase;               /* pllx controls CPU clock speed */
        ulong   pllxmisc;
        ulong   pllebase;               /* plle is dedicated to pcie */
        ulong   pllemisc;

        uchar   _pad2[0x340-0xf0];
        ulong   cpuset;
        ulong   cpuclr;
};

enum {
        /* rstsrc bits */
        Wdcpurst =      1<<0,
        Wdcoprst =      1<<1,
        Wdsysrst =      1<<2,
        Wdsel =         1<<4,           /* tmr1 or tmr2? */
        Wdena =         1<<5,

        /* devl bits */
        Sysreset =      1<<2,

        /* clkcpu bits */
        Cpu1stop =      1<<9,
        Cpu0stop =      1<<8,

        /* cpu* bits */
        Cpu1dbgreset =  1<<13,
        Cpu0dbgreset =  1<<12,
        Cpu1wdreset =   1<<9,
        Cpu0wdreset =   1<<8,
        Cpu1dereset =   1<<5,
        Cpu0dereset =   1<<4,
        Cpu1reset =     1<<1,
        Cpu0reset =     1<<0,
};

struct Power {
        ulong   ctl;                    /* mainly for rtc clock signals */
        ulong   secregdis;
        ulong   swrst;

        ulong   wakevmask;
        ulong   waklvl;
        ulong   waksts;
        ulong   swwaksts;

        ulong   dpdpadsovr;             /* deep power down pads override */
        ulong   dpdsample;
        ulong   dpden;

        ulong   gatetimroff;
        ulong   gatetimron;
        ulong   toggle;
        ulong   unclamp;
        ulong   gatests;                /* ro */

        ulong   goodtmr;
        ulong   blinktmr;

        ulong   noiopwr;
        ulong   detect;
        ulong   detlatch;

        ulong   scratch[24];
        ulong   secscratch[6];

        ulong   cpupwrgoodtmr;
        ulong   cpupwrofftmr;

        ulong   pgmask[2];

        ulong   autowaklvl;
        ulong   autowaklvlmask;
        ulong   wakdelay;

        ulong   detval;
        ulong   ddr;
        ulong   usbdebdel;      /* usb de-bounce delay */
        ulong   usbao;
        ulong   cryptoop;
        ulong   pllpwb0ovr;
        ulong   scratch24[42-24+1];
        ulong   boundoutmirr[3];
        ulong   sys33ven;
        ulong   boundoutmirracc;
        ulong   gate;
};

enum {
        /* toggle bits */
        Start   = 1<<8,
        /* partition ids */
        Partpcie= 3,
        Partl2  = 4,
};

struct Scu {
        ulong   ctl;
        ulong   cfg;                    /* ro */
        ulong   cpupwrsts;
        ulong   inval;

        uchar   _pad0[0x40-0x10];
        ulong   filtstart;
        ulong   filtend;

        uchar   _pad1[0x50-0x48];
        ulong   accctl;                 /* initially 0 */
        ulong   nsaccctl;
};

enum {
        /* ctl bits */
        Scuenable =     1<<0,
        Filter =        1<<1,
        Scuparity =     1<<2,
        Specfill =      1<<3,           /* only for PL310 */
        Allport0 =      1<<4,
        Standby =       1<<5,
        Icstandby =     1<<6,
};

struct Flow {
        ulong   haltcpu0;
        ulong   haltcop;
        ulong   cpu0;
        ulong   cop;
        ulong   xrq;
        ulong   haltcpu1;
        ulong   cpu1;
};

enum {
        /* haltcpu* bits */
        Stop =  2<<29,

        /* cpu* bits */
        Event =                 1<<14,  /* w1c */
        Waitwfebitsshift =      4,
        Waitwfebitsmask =       MASK(2),
        Eventenable =           1<<1,
        Cpuenable =             1<<0,
};

struct Diag {
        Cacheline c0;
        Lock;
        long    cnt;
        long    sync;
        Cacheline c1;
};

extern ulong testmem;

/*
 * number of cpus available.  contrast with conf.nmach, which is number
 * of running cpus.
 */
int navailcpus;
Isolated l1ptstable;

Soc soc = {
        .clkrst = 0x60006000,           /* clock & reset signals */
        .power  = 0x7000e400,
        .exceptvec = PHYSEVP,           /* undocumented magic */
        .sema   = 0x60001000,
        .l2cache= PHYSL2BAG,            /* pl310 bag on the side */
        .flow   = 0x60007000,

        /* 4 non-gic controllers */
//      .intr   = { 0x60004000, 0x60004100, 0x60004200, 0x60004300, },

        /* private memory region */
        .scu    = 0x50040000,
        /* we got this address from the `cortex-a series programmer's guide'. */
        .intr   = 0x50040100,           /* per-cpu interface */
        .glbtmr = 0x50040200,
        .loctmr = 0x50040600,
        .intrdist=0x50041000,

        .uart   = { 0x70006000, 0x70006040,
                    0x70006200, 0x70006300, 0x70006400, },

        .rtc    = 0x7000e000,
        .tmr    = { 0x60005000, 0x60005008, 0x60005050, 0x60005058, },
        .µs    = 0x60005010,

        .pci    = 0x80000000,
        .ether  = 0xa0024000,

        .nand   = 0x70008000,
        .nor    = 0x70009000,           /* also VIRTNOR */

        .ehci   = P2VAHB(0xc5000000),   /* 1st of 3 */
        .ide    = P2VAHB(0xc3000000),

        .gpio   = { 0x6000d000, 0x6000d080, 0x6000d100, 0x6000d180,
                            0x6000d200, 0x6000d280, 0x6000d300, },
        .spi    = { 0x7000d400, 0x7000d600, 0x7000d800, 0x7000da00, },
        .twsi   = 0x7000c000,
        .mmc    = { P2VAHB(0xc8000000), P2VAHB(0xc8000200),
                    P2VAHB(0xc8000400), P2VAHB(0xc8000600), },
};

static volatile Diag diag;
static int missed;

void
dumpcpuclks(void)               /* run CPU at full speed */
{
        Clkrst *clk = (Clkrst *)soc.clkrst;

        iprint("pllx base %#lux misc %#lux\n", clk->pllxbase, clk->pllxmisc);
        iprint("plle base %#lux misc %#lux\n", clk->pllebase, clk->pllemisc);
        iprint("super cclk divider %#lux\n", clk->supcclkdiv);
        iprint("super sclk divider %#lux\n", clk->supsclkdiv);
}

static char *
devidstr(ulong)
{
        return "ARM Cortex-A9";
}

void
archtegralink(void)
{
}

/* convert AddrDevid register to a string in buf and return buf */
char *
cputype2name(char *buf, int size)
{
        ulong r;

        r = cpidget();                  /* main id register */
        assert((r >> 24) == 'A');
        seprint(buf, buf + size, "Cortex-A9 r%ldp%ld",
                (r >> 20) & MASK(4), r & MASK(4));
        return buf;
}

static void
errata(void)
{
        ulong reg, r, p;

        /* apply cortex-a9 errata workarounds */
        r = cpidget();                  /* main id register */
        assert((r >> 24) == 'A');
        p = r & MASK(4);                /* minor revision */
        r >>= 20;
        r &= MASK(4);                   /* major revision */

        /* this is an undocumented `diagnostic register' that linux knows */
        reg = cprdsc(0, CpDTLB, 0, 1);
        if (r < 2 || r == 2 && p <= 2)
                reg |= 1<<4;                    /* 742230 */
        if (r == 2 && p <= 2)
                reg |= 1<<6 | 1<<12 | 1<<22;    /* 743622, 2×742231 */
        if (r < 3)
                reg |= 1<<11;                   /* 751472 */
        cpwrsc(0, CpDTLB, 0, 1, reg);
}

void
archconfinit(void)
{
        char *p;
        ulong hz;

        assert(m != nil);
        m->cpuhz = 1000 * Mhz;                  /* trimslice speed */
        p = getconf("*cpumhz");
        if (p) {
                hz = atoi(p) * Mhz;
                if (hz >= 100*Mhz && hz <= 3600UL*Mhz)
                        m->cpuhz = hz;
        }
        m->delayloop = m->cpuhz/2000;           /* initial estimate */
        errata();
}

int
archether(unsigned ctlrno, Ether *ether)
{
        switch(ctlrno) {
        case 0:
                ether->type = "rtl8169";                /* pci-e ether */
                ether->ctlrno = ctlrno;
                ether->irq = Pcieirq;                   /* non-msi pci-e intr */
                ether->nopt = 0;
                ether->mbps = 1000;
                return 1;
        }
        return -1;
}

void
dumpscustate(void)
{
        Scu *scu = (Scu *)soc.scu;

        print("cpu%d scu: accctl %#lux\n", m->machno, scu->accctl);
        print("cpu%d scu: smp cpu bit map %#lo for %ld cpus; ", m->machno,
                (scu->cfg >> 4) & MASK(4), (scu->cfg & MASK(2)) + 1);
        print("cpus' power %#lux\n", scu->cpupwrsts);
}

void
scuon(void)
{
        Scu *scu = (Scu *)soc.scu;

        if (scu->ctl & Scuenable)
                return;
        scu->inval = MASK(16);
        coherence();
        scu->ctl = Scuparity | Scuenable | Specfill;
        coherence();
}

int
getncpus(void)
{
        int n;
        char *p;
        Scu *scu;

        if (navailcpus == 0) {
                scu = (Scu *)soc.scu;
                navailcpus = (scu->cfg & MASK(2)) + 1;
                if (navailcpus > MAXMACH)
                        navailcpus = MAXMACH;

                p = getconf("*ncpu");
                if (p && *p) {
                        n = atoi(p);
                        if (n > 0 && n < navailcpus)
                                navailcpus = n;
                }
        }
        return navailcpus;
}

void
cpuidprint(void)
{
        char name[64];

        cputype2name(name, sizeof name);
        delay(50);                              /* let uart catch up */
        iprint("cpu%d: %lldMHz ARM %s %s-endian\n",
                m->machno, m->cpuhz / Mhz, name,
                getpsr() & PsrBigend? "big": "little");
}

static void
clockson(void)
{
        Clkrst *clk = (Clkrst *)soc.clkrst;

        /* enable all by clearing resets */
        clk->rstdevl = clk->rstdevh = clk->rstdevu = 0;
        coherence();
        clk->clkoutl = clk->clkouth = clk->clkoutu = ~0; /* enable all clocks */
        coherence();

        clk->rstsrc = Wdcpurst | Wdcoprst | Wdsysrst | Wdena;
        coherence();
}

/* we could be shutting down ourself (if cpu == m->machno), so take care. */
void
stopcpu(uint cpu)
{
        Flow *flow = (Flow *)soc.flow;
        Clkrst *clk = (Clkrst *)soc.clkrst;

        if (cpu == 0) {
                iprint("stopcpu: may not stop cpu0\n");
                return;
        }

        machoff(cpu);
        lock(&active);
        active.stopped |= 1 << cpu;
        unlock(&active);
        l1cache->wb();

        /* shut down arm7 avp coproc so it can't cause mischief. */
        /* could try watchdog without stopping avp. */
        flow->haltcop = Stop;
        coherence();
        flow->cop = 0;                                  /* no Cpuenable */
        coherence();
        delay(10);

        assert(cpu < Maxflowcpus);
        *(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = Stop;
        coherence();
        *(cpu == 0? &flow->cpu0: &flow->cpu1) = 0;      /* no Cpuenable */
        coherence();
        delay(10);

        /* cold reset */
        assert(cpu < Maxcpus);
        clk->cpuset = (Cpu0reset | Cpu0dbgreset | Cpu0dereset) << cpu;
        coherence();
        delay(1);

        l1cache->wb();
}

static void
synccpus(volatile long *cntp, int n)
{
        ainc(cntp);
        while (*cntp < n)
                ;
        /* all cpus should now be here */
}

static void
pass1(int pass, volatile Diag *dp)
{
        int i;

        if(m->machno == 0)
                iprint(" %d", pass);
        for (i = 1000*1000; --i > 0; ) {
                ainc(&dp->cnt);
                adec(&dp->cnt);
        }

        synccpus(&dp->sync, navailcpus);
        /* all cpus are now here */

        ilock(dp);
        if(dp->cnt != 0)
                panic("cpu%d: diag: failed w count %ld", m->machno, dp->cnt);
        iunlock(dp);

        synccpus(&dp->sync, 2 * navailcpus);
        /* all cpus are now here */
        adec(&dp->sync);
        adec(&dp->sync);
}

/*
 * try to confirm coherence of l1 caches.
 * assume that all available cpus will be started.
 */
void
l1diag(void)
{
        int pass;
        volatile Diag *dp;

        if (!Debug)
                return;

        l1cache->wb();

        /*
         * synchronise and print
         */
        dp = &diag;
        ilock(dp);
        if (m->machno == 0)
                iprint("l1: waiting for %d cpus... ", navailcpus);
        iunlock(dp);

        synccpus(&dp->sync, navailcpus);

        ilock(dp);
        if (m->machno == 0)
                iprint("cache coherency pass");
        iunlock(dp);

        synccpus(&dp->sync, 2 * navailcpus);
        adec(&dp->sync);
        adec(&dp->sync);

        /*
         * cpus contend
         */
        for (pass = 0; pass < 3; pass++)
                pass1(pass, dp);

        /*
         * synchronise and check sanity
         */
        synccpus(&dp->sync, navailcpus);

        if(dp->sync < navailcpus || dp->sync >= 2 * navailcpus)
                panic("cpu%d: diag: failed w dp->sync %ld", m->machno,
                        dp->sync);
        if(dp->cnt != 0)
                panic("cpu%d: diag: failed w dp->cnt %ld", m->machno,
                        dp->cnt);

        ilock(dp);
        iprint(" cpu%d ok", m->machno);
        iunlock(dp);

        synccpus(&dp->sync, 2 * navailcpus);
        adec(&dp->sync);
        adec(&dp->sync);
        l1cache->wb();

        /*
         * all done, print
         */
        ilock(dp);
        if (m->machno == 0)
                iprint("\n");
        iunlock(dp);
}

static void
unfreeze(uint cpu)
{
        Clkrst *clk = (Clkrst *)soc.clkrst;
        Flow *flow = (Flow *)soc.flow;

        assert(cpu < Maxcpus);

        clk->clkcpu &= ~(Cpu0stop << cpu);
        coherence();
        /* out of reset */
        clk->cpuclr = (Cpu0reset | Cpu0wdreset | Cpu0dbgreset | Cpu0dereset) <<
                cpu;
        coherence();

        assert(cpu < Maxflowcpus);
        *(cpu == 0? &flow->cpu0: &flow->cpu1) = 0;
        coherence();
        *(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = 0; /* normal operat'n */
        coherence();
}

/*
 * this is all a bit magic.  the soc.exceptvec register is effectively
 * undocumented.  we had to look at linux and experiment, alas.  this is the
 * sort of thing that should be standardised as part of the cortex mpcore spec.
 * even intel document their equivalent procedure.
 */
int
startcpu(uint cpu)
{
        int i, r;
        ulong oldvec, rstaddr;
        ulong *evp = (ulong *)soc.exceptvec;    /* magic */

        r = 0;
        if (getncpus() < 2 || cpu == m->machno ||
            cpu >= MAXMACH || cpu >= navailcpus)
                return -1;

        oldvec = *evp;
        l1cache->wb();                  /* start next cpu w same view of ram */
        *evp = rstaddr = PADDR(_vrst);  /* will start cpu executing at _vrst */
        coherence();
        l1cache->wb();
        unfreeze(cpu);

        for (i = 2000; i > 0 && *evp == rstaddr; i--)
                delay(1);
        if (i <= 0 || *evp != cpu) {
                iprint("cpu%d: didn't start!\n", cpu);
                stopcpu(cpu);           /* make sure it's stopped */
                r = -1;
        }
        *evp = oldvec;
        return r;
}

static void
cksecure(void)
{
        ulong db;
        extern ulong getdebug(void);

        if (getscr() & 1)
                panic("cpu%d: running non-secure", m->machno);
        db = getdebug();
        if (db)
                iprint("cpu%d: debug enable reg %#lux\n", m->machno, db);
}

ulong
smpon(void)
{
        ulong aux;

        /* cortex-a9 model-specific configuration */
        aux = getauxctl();
        putauxctl(aux | CpACsmp | CpACmaintbcast);
        return aux;
}

void
cortexa9cachecfg(void)
{
        /* cortex-a9 model-specific configuration */
        putauxctl(getauxctl() | CpACparity | CpAClwr0line | CpACl2pref);
}

/*
 * called on a cpu other than 0 from cpureset in l.s,
 * from _vrst in lexception.s.
 * mmu and l1 (and system-wide l2) caches and coherency (smpon) are on,
 * but interrupts are disabled.
 * our mmu is using an exact copy of cpu0's l1 page table
 * as it was after userinit ran.
 */
void
cpustart(void)
{
        int ms;
        ulong *evp;
        Power *pwr;

        up = nil;
        if (active.machs & (1<<m->machno)) {
                serialputc('?');
                serialputc('r');
                panic("cpu%d: resetting after start", m->machno);
        }
        assert(m->machno != 0);

        errata();
        cortexa9cachecfg();
        memdiag(&testmem);

        machinit();                     /* bumps nmach, adds bit to machs */
        machoff(m->machno);             /* not ready to go yet */

        /* clock signals and scu are system-wide and already on */
        clockshutdown();                /* kill any watch-dog timer */

        trapinit();
        clockinit();                    /* sets loop delay */
        timersinit();
        cpuidprint();

        /*
         * notify cpu0 that we're up so it can proceed to l1diag.
         */
        evp = (ulong *)soc.exceptvec;   /* magic */
        *evp = m->machno;
        coherence();

        l1diag();               /* contend with other cpus to verify sanity */

        /*
         * pwr->noiopwr == 0
         * pwr->detect == 0x1ff (default, all disabled)
         */
        pwr = (Power *)soc.power;
        assert(pwr->gatests == MASK(7)); /* everything has power */

        /*
         * 8169 has to initialise before we get past this, thus cpu0
         * has to schedule processes first.
         */
        if (Debug)
                iprint("cpu%d: waiting for 8169\n", m->machno);
        for (ms = 0; !l1ptstable.word && ms < 5000; ms += 10) {
                delay(10);
                cachedinvse(&l1ptstable.word, sizeof l1ptstable.word);
        }
        if (!l1ptstable.word)
                iprint("cpu%d: 8169 unreasonably slow; proceeding\n", m->machno);
        /* now safe to copy cpu0's l1 pt in mmuinit */

        mmuinit();                      /* update our l1 pt from cpu0's */
        fpon();
        machon(m->machno);              /* now ready to go and be scheduled */

        if (Debug)
                iprint("cpu%d: scheding\n", m->machno);
        schedinit();
        panic("cpu%d: schedinit returned", m->machno);
}

/* mainly used to break out of wfi */
void
sgintr(Ureg *ureg, void *)
{
        iprint("cpu%d: got sgi\n", m->machno);
        /* try to prod cpu1 into life when it gets stuck */
        if (m->machno != 0)
                clockprod(ureg);
}

void
archreset(void)
{
        static int beenhere;

        if (beenhere)
                return;
        beenhere = 1;

        /* conservative temporary values until archconfinit runs */
        m->cpuhz = 1000 * Mhz;                  /* trimslice speed */
        m->delayloop = m->cpuhz/2000;           /* initial estimate */

        prcachecfg();

        clockson();
        /* all partitions were powered up by u-boot, so needn't do anything */
        archconfinit();
//      resetusb();
        fpon();

        if (irqtooearly)
                panic("archreset: too early for irqenable");
        irqenable(Cpu0irq, sgintr, nil, "cpu0");
        irqenable(Cpu1irq, sgintr, nil, "cpu1");
        /* ... */
}

void
archreboot(void)
{
        Clkrst *clk = (Clkrst *)soc.clkrst;

        assert(m->machno == 0);
        iprint("archreboot: reset!\n");
        delay(20);

        clk->rstdevl |= Sysreset;
        coherence();
        delay(500);

        /* shouldn't get here */
        splhi();
        iprint("awaiting reset");
        for(;;) {
                delay(1000);
                print(".");
        }
}

void
kbdinit(void)
{
}

static void
missing(ulong addr, char *name)
{
        static int firstmiss = 1;

        if (addr == 0) {
                iprint("address zero for %s\n", name);
                return;
        }
        if (probeaddr(addr) >= 0)
                return;
        missed++;
        if (firstmiss) {
                iprint("missing:");
                firstmiss = 0;
        } else
                iprint(",\n\t");
        iprint(" %s at %#lux", name, addr);
}

/* verify that all the necessary device registers are accessible */
void
chkmissing(void)
{
        delay(10);
        missing(KZERO, "dram");
        missing(soc.intr, "intr ctlr");
        missing(soc.intrdist, "intr distrib");
        missing(soc.tmr[0], "tegra timer1");
        missing(soc.uart[0], "console uart");
        missing(soc.pci, "pcie");
        missing(soc.ether, "ether8169");
        missing(soc.µs, "µs counter");
        if (missed)
                iprint("\n");
        delay(10);
}

void
archflashwp(Flash*, int)
{
}

/*
 * for ../port/devflash.c:/^flashreset
 * retrieve flash type, virtual base and length and return 0;
 * return -1 on error (no flash)
 */
int
archflashreset(int bank, Flash *f)
{
        if(bank != 0)
                return -1;
panic("archflashreset: rewrite for nor & nand flash on ts");
        /*
         * this is set up for the igepv2 board.
         */
        f->type = "onenand";
        f->addr = (void*)VIRTNOR;               /* mapped here by archreset */
        f->size = 0;                            /* done by probe */
        f->width = 1;
        f->interleave = 0;
        return 0;
}