Statistics
| Revision:

root / wattch / power.c @ 56

History | View | Annotate | Download (103 KB)

1
/* I inclued this copyright since we're using Cacti for some stuff */
2

    
3
/*------------------------------------------------------------
4
 *  Copyright 1994 Digital Equipment Corporation and Steve Wilton
5
 *                         All Rights Reserved
6
 *
7
 * Permission to use, copy, and modify this software and its documentation is
8
 * hereby granted only under the following terms and conditions.  Both the
9
 * above copyright notice and this permission notice must appear in all copies
10
 * of the software, derivative works or modified versions, and any portions
11
 * thereof, and both notices must appear in supporting documentation.
12
 *
13
 * Users of this software agree to the terms and conditions set forth herein,
14
 * and hereby grant back to Digital a non-exclusive, unrestricted, royalty-
15
 * free right and license under any changes, enhancements or extensions
16
 * made to the core functions of the software, including but not limited to
17
 * those affording compatibility with other hardware or software
18
 * environments, but excluding applications which incorporate this software.
19
 * Users further agree to use their best efforts to return to Digital any
20
 * such changes, enhancements or extensions that they make and inform Digital
21
 * of noteworthy uses of this software.  Correspondence should be provided
22
 * to Digital at:
23
 *
24
 *                       Director of Licensing
25
 *                       Western Research Laboratory
26
 *                       Digital Equipment Corporation
27
 *                       100 Hamilton Avenue
28
 *                       Palo Alto, California  94301
29
 *
30
 * This software may be distributed (but not offered for sale or transferred
31
 * for compensation) to third parties, provided such third parties agree to
32
 * abide by the terms and conditions of this notice.
33
 *
34
 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
35
 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
36
 * OF MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
37
 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
38
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
39
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
41
 * SOFTWARE.
42
 *------------------------------------------------------------*/
43

    
44
#include <math.h>
45
#include "power.h"
46
#include "machine.h"
47
#include "cache.h"
48
#include "sim.h"
49
#include <assert.h>
50

    
51
//#define SensePowerfactor (Mhz)*(Vdd/2)*(Vdd/2)
52
//#define Sense2Powerfactor (Mhz)*(2*.3+.1*Vdd)
53
//#define Powerfactor (Mhz)*Vdd*Vdd
54
//#define LowSwingPowerfactor (Mhz)*.2*.2
55
/* set scale for crossover (vdd->gnd) currents */
56
double crossover_scaling = 1.2;
57
/* set non-ideal turnoff percentage */
58
double turnoff_factor = 0.1;
59

    
60
#define MSCALE (LSCALE * .624 / .2250)
61

    
62
/*----------------------------------------------------------------------*/
63

    
64
/* static power model results */
65
power_result_type power;
66

    
67
int pow2(int x) {
68
  return((int)pow(2.0,(double)x));
69
}
70

    
71
double logfour(x)
72
     double x;
73
{
74
  if (x<=0) fprintf(stderr,"%e\n",x);
75
  return( (double) (log(x)/log(4.0)) );
76
}
77

    
78
/* safer pop count to validate the fast algorithm */
79
int pop_count_slow(bquad_t bits)
80
{
81
  int count = 0; 
82
  bquad_t tmpbits = bits; 
83
  while (tmpbits) { 
84
    if (tmpbits & 1) ++count; 
85
    tmpbits >>= 1; 
86
  } 
87
  return count; 
88
}
89

    
90
/* fast pop count */
91
int pop_count(bquad_t bits)
92
{
93
#define T unsigned long long
94
#define ONES ((T)(-1)) 
95
#define TWO(k) ((T)1 << (k)) 
96
#define CYCL(k) (ONES/(1 + (TWO(TWO(k))))) 
97
#define BSUM(x,k) ((x)+=(x) >> TWO(k), (x) &= CYCL(k)) 
98
  bquad_t x = bits; 
99
  x = (x & CYCL(0)) + ((x>>TWO(0)) & CYCL(0)); 
100
  x = (x & CYCL(1)) + ((x>>TWO(1)) & CYCL(1)); 
101
  BSUM(x,2); 
102
  BSUM(x,3); 
103
  BSUM(x,4); 
104
  BSUM(x,5); 
105
  return x; 
106
}
107

    
108

    
109
int opcode_length = 8;
110
int inst_length = 32;
111

    
112
extern int ruu_decode_width;
113
extern int ruu_issue_width;
114
extern int ruu_commit_width;
115
extern int RUU_size;
116
extern int LSQ_size;
117
extern int data_width;
118
extern int res_ialu;
119
extern int res_fpalu;
120
extern int res_memport;
121

    
122
int nvreg_width;
123
int npreg_width;
124

    
125
extern int bimod_config[];
126

    
127
extern struct cache_t *cache_dl1;
128
extern struct cache_t *cache_il1;
129
extern struct cache_t *cache_dl2;
130

    
131
extern struct cache_t *dtlb;
132
extern struct cache_t *itlb;
133

    
134
/* 2-level predictor config (<l1size> <l2size> <hist_size> <xor>) */
135
extern int twolev_config[];
136

    
137
/* combining predictor config (<meta_table_size> */
138
extern int comb_config[];
139

    
140
/* return address stack (RAS) size */
141
extern int ras_size;
142

    
143
/* BTB predictor config (<num_sets> <associativity>) */
144
extern int btb_config[];
145

    
146
double global_clockcap;
147

    
148
static double rename_power=0;
149
static double bpred_power=0;
150
static double window_power=0;
151
static double lsq_power=0;
152
static double regfile_power=0;
153
static double icache_power=0;
154
static double dcache_power=0;
155
static double dcache2_power=0;
156
static double alu_power=0;
157
static double falu_power=0;
158
static double resultbus_power=0;
159
static double clock_power=0;
160

    
161
static double rename_power_cc1=0;
162
static double bpred_power_cc1=0;
163
static double window_power_cc1=0;
164
static double lsq_power_cc1=0;
165
static double regfile_power_cc1=0;
166
static double icache_power_cc1=0;
167
static double dcache_power_cc1=0;
168
static double dcache2_power_cc1=0;
169
static double alu_power_cc1=0;
170
static double resultbus_power_cc1=0;
171
static double clock_power_cc1=0;
172

    
173
static double rename_power_cc2=0;
174
static double bpred_power_cc2=0;
175
static double window_power_cc2=0;
176
static double lsq_power_cc2=0;
177
static double regfile_power_cc2=0;
178
static double icache_power_cc2=0;
179
static double dcache_power_cc2=0;
180
static double dcache2_power_cc2=0;
181
static double alu_power_cc2=0;
182
static double resultbus_power_cc2=0;
183
static double clock_power_cc2=0;
184

    
185
static double rename_power_cc3=0;
186
static double bpred_power_cc3=0;
187
static double window_power_cc3=0;
188
static double lsq_power_cc3=0;
189
static double regfile_power_cc3=0;
190
static double icache_power_cc3=0;
191
static double dcache_power_cc3=0;
192
static double dcache2_power_cc3=0;
193
static double alu_power_cc3=0;
194
static double resultbus_power_cc3=0;
195
static double clock_power_cc3=0;
196

    
197
static double total_cycle_power;
198
static double total_cycle_power_cc1;
199
static double total_cycle_power_cc2;
200
static double total_cycle_power_cc3;
201

    
202
static double total_parasitic_cc1 = 0.0;
203
static double total_parasitic_cc2 = 0.0;
204
static double total_parasitic_cc3 = 0.0;
205
static double offchip_parasitic_cc1 = 0.0;
206
static double offchip_parasitic_cc2 = 0.0;
207
static double offchip_parasitic_cc3 = 0.0;
208
static double onchip_parasitic_cc1 = 0.0;
209
static double onchip_parasitic_cc2 = 0.0;
210
static double onchip_parasitic_cc3 = 0.0;
211
#define PARASITIC_OHM 0.002
212
static double max_amp = 0.00;
213
static double min_amp = 1000.00;
214
static double offchip_ploss[] = {0.5, 0.5, // 1 amp
215
                                 0.5, 0.5, // 2 amp
216
                                 0.5, 0.5, // 3 amp
217
                                 0.6, 0.7, // 4
218
                                 0.8, 0.9, // 5
219
                                 1.0, 1.1, // 6
220
                                 1.2, 1.3, // 7
221
                                 1.5, 1.6, // 8
222
                                 1.8, 2.0, // 9
223
                                 2.2, 2.4, // 10
224
                                 2.6, 2.8, // 11
225
                                 3.0, 3.3, // 12
226
                                 3.6, 3.9, 4.0}; // 13
227

    
228
static double last_single_total_cycle_power_cc1 = 0.0;
229
static double last_single_total_cycle_power_cc2 = 0.0;
230
static double last_single_total_cycle_power_cc3 = 0.0;
231
static double current_total_cycle_power_cc1;
232
static double current_total_cycle_power_cc2;
233
static double current_total_cycle_power_cc3;
234

    
235
static double last_sim_num_insn = 0;
236
static double last_sim_total_insn = 0;
237
static double diff_dispatch = 0;
238
static double diff_commit = 0;
239
static int speed_grade = 1;
240
static int last_speed_grade = 1;
241
static double diff_dispatch_sum = 0;
242
static double diff_commit_sum = 0;
243
static int init_count = 0;
244
//#define DVFS_FIX
245
#define SUM_OVER 50000 // longer time = more power consumed
246
static double hist_dispatch[SUM_OVER];
247
static double hist_commit[SUM_OVER];
248
static int hist_idx = 0;
249
static double slow_cycles = 0;
250
static double fast_cycles = 0;
251
static double last_switch_time = 0;
252
static double cycle_count = 0;
253
#define SWITCH_CYCLES 30
254
static int speed_delay[SWITCH_CYCLES];
255
#define ONCHIP_VREG_LOSS_LOW 0.220
256
#define ONCHIP_VREG_LOSS_HIGH 0.120
257

    
258
static double max_cycle_power_cc1 = 0.0;
259
static double max_cycle_power_cc2 = 0.0;
260
static double max_cycle_power_cc3 = 0.0;
261

    
262
extern counter_t rename_access;
263
extern counter_t bpred_access;
264
extern counter_t window_access;
265
extern counter_t lsq_access;
266
extern counter_t regfile_access;
267
extern counter_t icache_access;
268
extern counter_t dcache_access;
269
extern counter_t dcache2_access;
270
extern counter_t alu_access;
271
extern counter_t ialu_access;
272
extern counter_t falu_access;
273
extern counter_t resultbus_access;
274

    
275
extern counter_t window_selection_access;
276
extern counter_t window_wakeup_access;
277
extern counter_t window_preg_access;
278
extern counter_t lsq_preg_access;
279
extern counter_t lsq_wakeup_access;
280
extern counter_t lsq_store_data_access;
281
extern counter_t lsq_load_data_access;
282

    
283
extern counter_t window_total_pop_count_cycle;
284
extern counter_t window_num_pop_count_cycle;
285
extern counter_t lsq_total_pop_count_cycle;
286
extern counter_t lsq_num_pop_count_cycle;
287
extern counter_t regfile_total_pop_count_cycle;
288
extern counter_t regfile_num_pop_count_cycle;
289
extern counter_t resultbus_total_pop_count_cycle;
290
extern counter_t resultbus_num_pop_count_cycle;
291

    
292
static counter_t total_rename_access=0;
293
static counter_t total_bpred_access=0;
294
static counter_t total_window_access=0;
295
static counter_t total_lsq_access=0;
296
static counter_t total_regfile_access=0;
297
static counter_t total_icache_access=0;
298
static counter_t total_dcache_access=0;
299
static counter_t total_dcache2_access=0;
300
static counter_t total_alu_access=0;
301
static counter_t total_resultbus_access=0;
302

    
303
static counter_t max_rename_access;
304
static counter_t max_bpred_access;
305
static counter_t max_window_access;
306
static counter_t max_lsq_access;
307
static counter_t max_regfile_access;
308
static counter_t max_icache_access;
309
static counter_t max_dcache_access;
310
static counter_t max_dcache2_access;
311
static counter_t max_alu_access;
312
static counter_t max_resultbus_access;
313

    
314
void clear_access_stats()
315
{
316
  rename_access=0;
317
  bpred_access=0;
318
  window_access=0;
319
  lsq_access=0;
320
  regfile_access=0;
321
  icache_access=0;
322
  dcache_access=0;
323
  dcache2_access=0;
324
  alu_access=0;
325
  ialu_access=0;
326
  falu_access=0;
327
  resultbus_access=0;
328

    
329
  window_preg_access=0;
330
  window_selection_access=0;
331
  window_wakeup_access=0;
332
  lsq_store_data_access=0;
333
  lsq_load_data_access=0;
334
  lsq_wakeup_access=0;
335
  lsq_preg_access=0;
336

    
337
  window_total_pop_count_cycle=0;
338
  window_num_pop_count_cycle=0;
339
  lsq_total_pop_count_cycle=0;
340
  lsq_num_pop_count_cycle=0;
341
  regfile_total_pop_count_cycle=0;
342
  regfile_num_pop_count_cycle=0;
343
  resultbus_total_pop_count_cycle=0;
344
  resultbus_num_pop_count_cycle=0;
345
}
346

    
347
/* compute bitline activity factors which we use to scale bitline power 
348
   Here it is very important whether we assume 0's or 1's are
349
   responsible for dissipating power in pre-charged stuctures. (since
350
   most of the bits are 0's, we assume the design is power-efficient
351
   enough to allow 0's to _not_ discharge 
352
*/
353
double compute_af(counter_t num_pop_count_cycle,counter_t total_pop_count_cycle,int pop_width) {
354
  double avg_pop_count;
355
  double af,af_b;
356

    
357
  if(num_pop_count_cycle)
358
    avg_pop_count = (double)total_pop_count_cycle / (double)num_pop_count_cycle;
359
  else
360
    avg_pop_count = 0;
361

    
362
  af = avg_pop_count / (double)pop_width;
363
  
364
  af_b = 1.0 - af;
365

    
366
  /*  printf("af == %f%%, af_b == %f%%, total_pop == %d, num_pop == %d\n",100*af,100*af_b,total_pop_count_cycle,num_pop_count_cycle); */
367

    
368
  return(af_b);
369
}
370

    
371
/* compute power statistics on each cycle, for each conditional clocking style.  Obviously
372
most of the speed penalty comes here, so if you don't want per-cycle power estimates
373
you could post-process 
374

375
See README.wattch for details on the various clock gating styles.
376

377
*/
378
void update_power_stats()
379
{
380
  double window_af_b, lsq_af_b, regfile_af_b, resultbus_af_b;
381
  double current;
382
  int speed_idx;
383

    
384
#ifdef DYNAMIC_AF
385
  window_af_b = compute_af(window_num_pop_count_cycle,window_total_pop_count_cycle,data_width);
386
  lsq_af_b = compute_af(lsq_num_pop_count_cycle,lsq_total_pop_count_cycle,data_width);
387
  regfile_af_b = compute_af(regfile_num_pop_count_cycle,regfile_total_pop_count_cycle,data_width);
388
  resultbus_af_b = compute_af(resultbus_num_pop_count_cycle,resultbus_total_pop_count_cycle,data_width);
389
#endif
390
  
391
  rename_power+=power.rename_power;
392
  bpred_power+=power.bpred_power;
393
  window_power+=power.window_power;
394
  lsq_power+=power.lsq_power;
395
  regfile_power+=power.regfile_power;
396
  icache_power+=power.icache_power+power.itlb;
397
  dcache_power+=power.dcache_power+power.dtlb;
398
  dcache2_power+=power.dcache2_power;
399
  alu_power+=power.ialu_power + power.falu_power;
400
  falu_power+=power.falu_power;
401
  resultbus_power+=power.resultbus;
402
  clock_power+=power.clock_power;
403

    
404
  total_rename_access+=rename_access;
405
  total_bpred_access+=bpred_access;
406
  total_window_access+=window_access;
407
  total_lsq_access+=lsq_access;
408
  total_regfile_access+=regfile_access;
409
  total_icache_access+=icache_access;
410
  total_dcache_access+=dcache_access;
411
  total_dcache2_access+=dcache2_access;
412
  total_alu_access+=alu_access;
413
  total_resultbus_access+=resultbus_access;
414

    
415
  max_rename_access=MAX(rename_access,max_rename_access);
416
  max_bpred_access=MAX(bpred_access,max_bpred_access);
417
  max_window_access=MAX(window_access,max_window_access);
418
  max_lsq_access=MAX(lsq_access,max_lsq_access);
419
  max_regfile_access=MAX(regfile_access,max_regfile_access);
420
  max_icache_access=MAX(icache_access,max_icache_access);
421
  max_dcache_access=MAX(dcache_access,max_dcache_access);
422
  max_dcache2_access=MAX(dcache2_access,max_dcache2_access);
423
  max_alu_access=MAX(alu_access,max_alu_access);
424
  max_resultbus_access=MAX(resultbus_access,max_resultbus_access);
425
      
426
  if(rename_access) {
427
    rename_power_cc1+=power.rename_power;
428
    rename_power_cc2+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
429
    rename_power_cc3+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
430
  }
431
  else 
432
    rename_power_cc3+=turnoff_factor*power.rename_power;
433

    
434
  if(bpred_access) {
435
    if(bpred_access <= 2)
436
      bpred_power_cc1+=power.bpred_power;
437
    else
438
      bpred_power_cc1+=((double)bpred_access/2.0) * power.bpred_power;
439
    bpred_power_cc2+=((double)bpred_access/2.0) * power.bpred_power;
440
    bpred_power_cc3+=((double)bpred_access/2.0) * power.bpred_power;
441
  }
442
  else
443
    bpred_power_cc3+=turnoff_factor*power.bpred_power;
444

    
445
#ifdef STATIC_AF
446
  if(window_preg_access) {
447
    if(window_preg_access <= 3*ruu_issue_width)
448
      window_power_cc1+=power.rs_power;
449
    else
450
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
451
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
452
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
453
  }
454
  else
455
    window_power_cc3+=turnoff_factor*power.rs_power;
456
#elif defined(DYNAMIC_AF)
457
  if(window_preg_access) {
458
    if(window_preg_access <= 3*ruu_issue_width)
459
      window_power_cc1+=power.rs_power_nobit + window_af_b*power.rs_bitline;
460
    else
461
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
462
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
463
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
464
  }
465
  else
466
    window_power_cc3+=turnoff_factor*power.rs_power;
467
#else
468
  panic("no AF-style defined\n");
469
#endif
470

    
471
  if(window_selection_access) {
472
    if(window_selection_access <= ruu_issue_width)
473
      window_power_cc1+=power.selection;
474
    else
475
      window_power_cc1+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
476
    window_power_cc2+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
477
    window_power_cc3+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
478
  }
479
  else
480
    window_power_cc3+=turnoff_factor*power.selection;
481

    
482
  if(window_wakeup_access) {
483
    if(window_wakeup_access <= ruu_issue_width)
484
      window_power_cc1+=power.wakeup_power;
485
    else
486
      window_power_cc1+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
487
    window_power_cc2+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
488
    window_power_cc3+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
489
  }
490
  else
491
    window_power_cc3+=turnoff_factor*power.wakeup_power;
492

    
493
  if(lsq_wakeup_access) {
494
    if(lsq_wakeup_access <= res_memport)
495
      lsq_power_cc1+=power.lsq_wakeup_power;
496
    else
497
      lsq_power_cc1+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
498
    lsq_power_cc2+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
499
    lsq_power_cc3+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
500
  }
501
  else
502
    lsq_power_cc3+=turnoff_factor*power.lsq_wakeup_power;
503

    
504
#ifdef STATIC_AF
505
  if(lsq_preg_access) {
506
    if(lsq_preg_access <= res_memport)
507
      lsq_power_cc1+=power.lsq_rs_power;
508
    else
509
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
510
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
511
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
512
  }
513
  else
514
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
515
#else
516
  if(lsq_preg_access) {
517
    if(lsq_preg_access <= res_memport)
518
      lsq_power_cc1+=power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline;
519
    else
520
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
521
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
522
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
523
  }
524
  else
525
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
526
#endif
527

    
528
#ifdef STATIC_AF
529
  if(regfile_access) {
530
    if(regfile_access <= (3.0*ruu_commit_width))
531
      regfile_power_cc1+=power.regfile_power;
532
    else
533
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
534
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
535
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
536
  }
537
  else
538
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
539
#else
540
  if(regfile_access) {
541
    if(regfile_access <= (3.0*ruu_commit_width))
542
      regfile_power_cc1+=power.regfile_power_nobit + regfile_af_b*power.regfile_bitline;
543
    else
544
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
545
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
546
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
547
  }
548
  else
549
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
550
#endif
551

    
552
  if(icache_access) {
553
    /* don't scale icache because we assume 1 line is fetched, unless fetch stalls */
554
    icache_power_cc1+=power.icache_power+power.itlb;
555
    icache_power_cc2+=power.icache_power+power.itlb;
556
    icache_power_cc3+=power.icache_power+power.itlb;
557
  }
558
  else
559
    icache_power_cc3+=turnoff_factor*(power.icache_power+power.itlb);
560

    
561
  if(dcache_access) {
562
    if(dcache_access <= res_memport)
563
      dcache_power_cc1+=power.dcache_power+power.dtlb;
564
    else
565
      dcache_power_cc1+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
566
                                                     power.dtlb);
567
    dcache_power_cc2+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
568
                                                   power.dtlb);
569
    dcache_power_cc3+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
570
                                                   power.dtlb);
571
  }
572
  else
573
    dcache_power_cc3+=turnoff_factor*(power.dcache_power+power.dtlb);
574

    
575
  if(dcache2_access) {
576
    if(dcache2_access <= res_memport)
577
      dcache2_power_cc1+=power.dcache2_power;
578
    else
579
      dcache2_power_cc1+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
580
    dcache2_power_cc2+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
581
    dcache2_power_cc3+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
582
  }
583
  else
584
    dcache2_power_cc3+=turnoff_factor*power.dcache2_power;
585

    
586
  if(alu_access) {
587
    if(ialu_access)
588
      alu_power_cc1+=power.ialu_power;
589
    else
590
      alu_power_cc3+=turnoff_factor*power.ialu_power;
591
    if(falu_access)
592
      alu_power_cc1+=power.falu_power;
593
    else
594
      alu_power_cc3+=turnoff_factor*power.falu_power;
595

    
596
    alu_power_cc2+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
597
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
598
    alu_power_cc3+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
599
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
600
  }
601
  else
602
    alu_power_cc3+=turnoff_factor*(power.ialu_power + power.falu_power);
603

    
604
#ifdef STATIC_AF
605
  if(resultbus_access) {
606
    assert(ruu_issue_width != 0);
607
    if(resultbus_access <= ruu_issue_width) {
608
      resultbus_power_cc1+=power.resultbus;
609
    }
610
    else {
611
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
612
    }
613
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
614
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
615
  }
616
  else
617
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
618
#else
619
  if(resultbus_access) {
620
    assert(ruu_issue_width != 0);
621
    if(resultbus_access <= ruu_issue_width) {
622
      resultbus_power_cc1+=resultbus_af_b*power.resultbus;
623
    }
624
    else {
625
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
626
    }
627
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
628
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
629
  }
630
  else
631
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
632
#endif
633

    
634
  total_cycle_power = rename_power + bpred_power + window_power + 
635
    lsq_power + regfile_power + icache_power + dcache_power +
636
    alu_power + resultbus_power;
637

    
638
  total_cycle_power_cc1 = rename_power_cc1 + bpred_power_cc1 + 
639
    window_power_cc1 + lsq_power_cc1 + regfile_power_cc1 + 
640
    icache_power_cc1 + dcache_power_cc1 + alu_power_cc1 + 
641
    resultbus_power_cc1;
642

    
643
  total_cycle_power_cc2 = rename_power_cc2 + bpred_power_cc2 + 
644
    window_power_cc2 + lsq_power_cc2 + regfile_power_cc2 + 
645
    icache_power_cc2 + dcache_power_cc2 + alu_power_cc2 + 
646
    resultbus_power_cc2;
647

    
648
  total_cycle_power_cc3 = rename_power_cc3 + bpred_power_cc3 + 
649
    window_power_cc3 + lsq_power_cc3 + regfile_power_cc3 + 
650
    icache_power_cc3 + dcache_power_cc3 + alu_power_cc3 + 
651
    resultbus_power_cc3;
652

    
653
  clock_power_cc1+=power.clock_power*(total_cycle_power_cc1/total_cycle_power);
654
  clock_power_cc2+=power.clock_power*(total_cycle_power_cc2/total_cycle_power);
655
  clock_power_cc3+=power.clock_power*(total_cycle_power_cc3/total_cycle_power);
656

    
657
  total_cycle_power_cc1 += clock_power_cc1;
658
  total_cycle_power_cc2 += clock_power_cc2;
659
  total_cycle_power_cc3 += clock_power_cc3;
660

    
661
  current_total_cycle_power_cc1 = total_cycle_power_cc1
662
    -last_single_total_cycle_power_cc1;
663
  current_total_cycle_power_cc2 = total_cycle_power_cc2
664
    -last_single_total_cycle_power_cc2;
665
  current_total_cycle_power_cc3 = total_cycle_power_cc3
666
    -last_single_total_cycle_power_cc3;
667

    
668
   current = current_total_cycle_power_cc3 / Vdd;
669

    
670
  if (max_amp < current ) {
671
      max_amp = current ;
672
  }
673

    
674
  if (min_amp > current) {
675
      min_amp = current;
676
  }
677

    
678
  if (current < 0.5) {
679
      offchip_parasitic_cc1 += offchip_ploss[0];
680
      offchip_parasitic_cc2 += offchip_ploss[0];
681
      offchip_parasitic_cc3 += offchip_ploss[0];
682
  } else if (current < 1) {
683
      offchip_parasitic_cc1 += offchip_ploss[1];
684
      offchip_parasitic_cc2 += offchip_ploss[1];
685
      offchip_parasitic_cc3 += offchip_ploss[1];
686
  } else if (current < 1.5) {
687
      offchip_parasitic_cc1 += offchip_ploss[2];
688
      offchip_parasitic_cc2 += offchip_ploss[2];
689
      offchip_parasitic_cc3 += offchip_ploss[2];
690
  } else if (current < 2) {
691
      offchip_parasitic_cc1 += offchip_ploss[3];
692
      offchip_parasitic_cc2 += offchip_ploss[3];
693
      offchip_parasitic_cc3 += offchip_ploss[3];
694
  } else if (current < 2.5) {
695
      offchip_parasitic_cc1 += offchip_ploss[4];
696
      offchip_parasitic_cc2 += offchip_ploss[4];
697
      offchip_parasitic_cc3 += offchip_ploss[4];
698
  } else if (current < 3) {
699
      offchip_parasitic_cc1 += offchip_ploss[5];
700
      offchip_parasitic_cc2 += offchip_ploss[5];
701
      offchip_parasitic_cc3 += offchip_ploss[5];
702
  } else if (current < 3.5) {
703
      offchip_parasitic_cc1 += offchip_ploss[6];
704
      offchip_parasitic_cc2 += offchip_ploss[6];
705
      offchip_parasitic_cc3 += offchip_ploss[6];
706
  } else if (current < 4) {
707
      offchip_parasitic_cc1 += offchip_ploss[7];
708
      offchip_parasitic_cc2 += offchip_ploss[7];
709
      offchip_parasitic_cc3 += offchip_ploss[7];
710
  } else if (current < 4.5) {
711
      offchip_parasitic_cc1 += offchip_ploss[8];
712
      offchip_parasitic_cc2 += offchip_ploss[8];
713
      offchip_parasitic_cc3 += offchip_ploss[8];
714
  } else if (current < 5) {
715
      offchip_parasitic_cc1 += offchip_ploss[9];
716
      offchip_parasitic_cc2 += offchip_ploss[9];
717
      offchip_parasitic_cc3 += offchip_ploss[9];
718
  } else if (current < 5.5) {
719
      offchip_parasitic_cc1 += offchip_ploss[10];
720
      offchip_parasitic_cc2 += offchip_ploss[10];
721
      offchip_parasitic_cc3 += offchip_ploss[10];
722
  } else if (current < 6) {
723
      offchip_parasitic_cc1 += offchip_ploss[11];
724
      offchip_parasitic_cc2 += offchip_ploss[11];
725
      offchip_parasitic_cc3 += offchip_ploss[11];
726
  } else if (current < 6.5) {
727
      offchip_parasitic_cc1 += offchip_ploss[12];
728
      offchip_parasitic_cc2 += offchip_ploss[12];
729
      offchip_parasitic_cc3 += offchip_ploss[12];
730
  } else if (current < 7) {
731
      offchip_parasitic_cc1 += offchip_ploss[13];
732
      offchip_parasitic_cc2 += offchip_ploss[13];
733
      offchip_parasitic_cc3 += offchip_ploss[13];
734
  } else if (current < 7.5) {
735
      offchip_parasitic_cc1 += offchip_ploss[14];
736
      offchip_parasitic_cc2 += offchip_ploss[14];
737
      offchip_parasitic_cc3 += offchip_ploss[14];
738
  } else if (current < 8) {
739
      offchip_parasitic_cc1 += offchip_ploss[15];
740
      offchip_parasitic_cc2 += offchip_ploss[15];
741
      offchip_parasitic_cc3 += offchip_ploss[15];
742
  } else if (current < 8.5) {
743
      offchip_parasitic_cc1 += offchip_ploss[16];
744
      offchip_parasitic_cc2 += offchip_ploss[16];
745
      offchip_parasitic_cc3 += offchip_ploss[16];
746
  } else if (current < 9) {
747
      offchip_parasitic_cc1 += offchip_ploss[17];
748
      offchip_parasitic_cc2 += offchip_ploss[17];
749
      offchip_parasitic_cc3 += offchip_ploss[17];
750
  } else if (current < 9.5) {
751
      offchip_parasitic_cc1 += offchip_ploss[18];
752
      offchip_parasitic_cc2 += offchip_ploss[18];
753
      offchip_parasitic_cc3 += offchip_ploss[18];
754
  } else if (current < 10) {
755
      offchip_parasitic_cc1 += offchip_ploss[19];
756
      offchip_parasitic_cc2 += offchip_ploss[19];
757
      offchip_parasitic_cc3 += offchip_ploss[19];
758
  } else if (current < 10.5) {
759
      offchip_parasitic_cc1 += offchip_ploss[20];
760
      offchip_parasitic_cc2 += offchip_ploss[20];
761
      offchip_parasitic_cc3 += offchip_ploss[20];
762
  } else if (current < 11) {
763
      offchip_parasitic_cc1 += offchip_ploss[21];
764
      offchip_parasitic_cc2 += offchip_ploss[21];
765
      offchip_parasitic_cc3 += offchip_ploss[21];
766
  } else if (current < 11.5) {
767
      offchip_parasitic_cc1 += offchip_ploss[22];
768
      offchip_parasitic_cc2 += offchip_ploss[22];
769
      offchip_parasitic_cc3 += offchip_ploss[22];
770
  } else if (current < 12) {
771
      offchip_parasitic_cc1 += offchip_ploss[23];
772
      offchip_parasitic_cc2 += offchip_ploss[23];
773
      offchip_parasitic_cc3 += offchip_ploss[23];
774
  } else if (current < 12.5) {
775
      offchip_parasitic_cc1 += offchip_ploss[24];
776
      offchip_parasitic_cc2 += offchip_ploss[24];
777
      offchip_parasitic_cc3 += offchip_ploss[24];
778
  } else if (current < 13) {
779
      offchip_parasitic_cc1 += offchip_ploss[25];
780
      offchip_parasitic_cc2 += offchip_ploss[25];
781
      offchip_parasitic_cc3 += offchip_ploss[25];
782
  } else {
783
      offchip_parasitic_cc1 += offchip_ploss[26];
784
      offchip_parasitic_cc2 += offchip_ploss[26];
785
      offchip_parasitic_cc3 += offchip_ploss[26];
786
  }
787

    
788
  offchip_parasitic_cc1 += pow(current, 2) * PARASITIC_OHM;
789
  offchip_parasitic_cc2 += pow(current, 2) * PARASITIC_OHM;
790
  offchip_parasitic_cc3 += pow(current, 2) * PARASITIC_OHM;
791

    
792
  // Onchip regulator paraisitc loss
793
  if (speed_grade == 0) {
794
      onchip_parasitic_cc1 += ONCHIP_VREG_LOSS_LOW;
795
      onchip_parasitic_cc2 += ONCHIP_VREG_LOSS_LOW;
796
      onchip_parasitic_cc3 += ONCHIP_VREG_LOSS_LOW;
797
  } else {
798
      onchip_parasitic_cc1 += ONCHIP_VREG_LOSS_HIGH;
799
      onchip_parasitic_cc2 += ONCHIP_VREG_LOSS_HIGH;
800
      onchip_parasitic_cc3 += ONCHIP_VREG_LOSS_HIGH;
801
  }
802
    
803
  max_cycle_power_cc1 = MAX(max_cycle_power_cc1,current_total_cycle_power_cc1);
804
  max_cycle_power_cc2 = MAX(max_cycle_power_cc2,current_total_cycle_power_cc2);
805
  max_cycle_power_cc3 = MAX(max_cycle_power_cc3,current_total_cycle_power_cc3);
806

    
807
  last_single_total_cycle_power_cc1 = total_cycle_power_cc1;
808
  last_single_total_cycle_power_cc2 = total_cycle_power_cc2;
809
  last_single_total_cycle_power_cc3 = total_cycle_power_cc3;
810

    
811
  cycle_count++;
812

    
813
  // here's where we change VFI levels
814
  diff_dispatch = sim_total_insn - last_sim_total_insn;
815
  diff_commit = sim_num_insn - last_sim_num_insn;
816
  
817
  diff_dispatch_sum += diff_dispatch;
818
  diff_commit_sum += diff_commit;
819

    
820
  hist_dispatch[hist_idx] = diff_dispatch;
821
  hist_commit[hist_idx] = diff_commit;
822
  hist_idx++;
823
  if(hist_idx >= SUM_OVER) {
824
    hist_idx = 0;
825
  }
826

    
827
  if(init_count >= SUM_OVER) {
828
      // Update speed
829
    speed_grade = speed_delay[SWITCH_CYCLES - 1];
830
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES-1; speed_idx++) {
831

    
832
        speed_delay[speed_idx+1] = speed_delay[speed_idx];
833
    }
834

    
835
    diff_dispatch_sum -= hist_dispatch[hist_idx];
836
    diff_commit_sum -= hist_commit[hist_idx];
837

    
838
    if( diff_commit_sum < diff_dispatch_sum ) {
839
        speed_delay[0] = 0;
840
    }
841
    else if( diff_commit_sum >= diff_dispatch_sum ) {
842
        speed_delay[0] = 1;
843
    }
844

    
845
    if(speed_grade == 0) {
846
        slow_cycles++;
847
    }
848
    else {
849
        fast_cycles++;
850
    }
851

    
852
  } else {
853
    init_count++;
854
    fast_cycles++;
855

    
856
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES; speed_idx++) {
857
        speed_delay[speed_idx] = 1;
858
    }
859
  }
860

    
861
//  if (diff_commit <= diff_dispatch) {
862
//      speed_grade = 0;
863
//  } else if (diff_commit > diff_dispatch) {
864
//      speed_grade = 1;
865
//  }
866

    
867
  if ((speed_grade == 0) && (last_speed_grade == 1)) {
868
      Mhz = Mhz / 2;
869
      Vdd = Vdd / 2;
870
      printf("Speed down!\n");
871
      last_switch_time = cycle_count;
872
  } else if ((speed_grade == 1) && (last_speed_grade == 0)) {
873
      Mhz = Mhz * 2;
874
      Vdd = Vdd * 2;
875
      printf("Speed up!\n");
876
      last_switch_time = cycle_count;
877
  }
878
#ifdef DVFS_FIX
879
  else if (last_switch_time < cycle_count-(SUM_OVER/3) && speed_grade==0 ) {
880
      speed_grade = 1;
881
      Mhz = Mhz * 2;
882
      Vdd = Vdd * 2;
883
      init_count = 0;
884
      last_switch_time = cycle_count;
885
      hist_idx = 0;
886
      diff_commit_sum = 0;
887
      diff_dispatch_sum = 0;
888
  }
889
#endif
890
      //printf("Vdd = %f, MHz = %f\n",Vdd,Mhz);
891

    
892
  if (speed_grade != last_speed_grade) {
893
    Period = 1/Mhz;
894
    SensePowerfactor3 = Mhz * Vbitsense * Vbitsense;
895
    SensePowerfactor2 = Mhz * (Vbitpre - Vbitsense) * (Vbitpre - Vbitsense);
896
    SensePowerfactor = (Mhz) * (Vdd/2) * (Vdd/2);
897
    Powerfactor = (Mhz) * (Vdd) * (Vdd);
898
    Sense2Powerfactor = Mhz * (2 * .3 + .1 * Vdd);
899
    LowSwingPowerfactor = Mhz * .2 * .2;
900
      calculate_power(&power);
901
  }
902

    
903
  last_speed_grade = speed_grade;
904

    
905
  // Update
906
  last_sim_num_insn  = sim_num_insn;
907
  last_sim_total_insn = sim_total_insn;
908

    
909
}
910

    
911
void
912
power_reg_stats(struct stat_sdb_t *sdb)        /* stats database */
913
{
914
  stat_reg_double(sdb, "rename_power", "total power usage of rename unit", &rename_power, 0, NULL);
915

    
916
  stat_reg_double(sdb, "bpred_power", "total power usage of bpred unit", &bpred_power, 0, NULL);
917

    
918
  stat_reg_double(sdb, "window_power", "total power usage of instruction window", &window_power, 0, NULL);
919

    
920
  stat_reg_double(sdb, "lsq_power", "total power usage of load/store queue", &lsq_power, 0, NULL);
921

    
922
  stat_reg_double(sdb, "regfile_power", "total power usage of arch. regfile", &regfile_power, 0, NULL);
923

    
924
  stat_reg_double(sdb, "icache_power", "total power usage of icache", &icache_power, 0, NULL);
925

    
926
  stat_reg_double(sdb, "dcache_power", "total power usage of dcache", &dcache_power, 0, NULL);
927

    
928
  stat_reg_double(sdb, "dcache2_power", "total power usage of dcache2", &dcache2_power, 0, NULL);
929

    
930
  stat_reg_double(sdb, "alu_power", "total power usage of alu", &alu_power, 0, NULL);
931

    
932
  stat_reg_double(sdb, "falu_power", "total power usage of falu", &falu_power, 0, NULL);
933

    
934
  stat_reg_double(sdb, "resultbus_power", "total power usage of resultbus", &resultbus_power, 0, NULL);
935

    
936
  stat_reg_double(sdb, "clock_power", "total power usage of clock", &clock_power, 0, NULL);
937

    
938
  stat_reg_formula(sdb, "avg_rename_power", "avg power usage of rename unit", "rename_power/sim_cycle", NULL);
939

    
940
  stat_reg_formula(sdb, "avg_bpred_power", "avg power usage of bpred unit", "bpred_power/sim_cycle", NULL);
941

    
942
  stat_reg_formula(sdb, "avg_window_power", "avg power usage of instruction window", "window_power/sim_cycle",  NULL);
943

    
944
  stat_reg_formula(sdb, "avg_lsq_power", "avg power usage of lsq", "lsq_power/sim_cycle",  NULL);
945

    
946
  stat_reg_formula(sdb, "avg_regfile_power", "avg power usage of arch. regfile", "regfile_power/sim_cycle",  NULL);
947

    
948
  stat_reg_formula(sdb, "avg_icache_power", "avg power usage of icache", "icache_power/sim_cycle",  NULL);
949

    
950
  stat_reg_formula(sdb, "avg_dcache_power", "avg power usage of dcache", "dcache_power/sim_cycle",  NULL);
951

    
952
  stat_reg_formula(sdb, "avg_dcache2_power", "avg power usage of dcache2", "dcache2_power/sim_cycle",  NULL);
953

    
954
  stat_reg_formula(sdb, "avg_alu_power", "avg power usage of alu", "alu_power/sim_cycle",  NULL);
955

    
956
  stat_reg_formula(sdb, "avg_falu_power", "avg power usage of falu", "falu_power/sim_cycle",  NULL);
957

    
958
  stat_reg_formula(sdb, "avg_resultbus_power", "avg power usage of resultbus", "resultbus_power/sim_cycle",  NULL);
959

    
960
  stat_reg_formula(sdb, "avg_clock_power", "avg power usage of clock", "clock_power/sim_cycle",  NULL);
961

    
962
  stat_reg_formula(sdb, "fetch_stage_power", "total power usage of fetch stage", "icache_power + bpred_power", NULL);
963

    
964
  stat_reg_formula(sdb, "dispatch_stage_power", "total power usage of dispatch stage", "rename_power", NULL);
965

    
966
  stat_reg_formula(sdb, "issue_stage_power", "total power usage of issue stage", "resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power", NULL);
967

    
968
  stat_reg_formula(sdb, "avg_fetch_power", "average power of fetch unit per cycle", "(icache_power + bpred_power)/ sim_cycle", /* format */NULL);
969

    
970
  stat_reg_formula(sdb, "avg_dispatch_power", "average power of dispatch unit per cycle", "(rename_power)/ sim_cycle", /* format */NULL);
971

    
972
  stat_reg_formula(sdb, "avg_issue_power", "average power of issue unit per cycle", "(resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power)/ sim_cycle", /* format */NULL);
973

    
974
  stat_reg_formula(sdb, "total_power", "total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power  + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)", NULL);
975

    
976
  stat_reg_formula(sdb, "avg_total_power_cycle", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_cycle", NULL);
977

    
978
  stat_reg_formula(sdb, "avg_total_power_cycle_nofp_nod2", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_cycle", NULL);
979

    
980
  stat_reg_formula(sdb, "avg_total_power_insn", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_total_insn", NULL);
981

    
982
  stat_reg_formula(sdb, "avg_total_power_insn_nofp_nod2", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_total_insn", NULL);
983

    
984
  stat_reg_double(sdb, "rename_power_cc1", "total power usage of rename unit_cc1", &rename_power_cc1, 0, NULL);
985

    
986
  stat_reg_double(sdb, "bpred_power_cc1", "total power usage of bpred unit_cc1", &bpred_power_cc1, 0, NULL);
987

    
988
  stat_reg_double(sdb, "window_power_cc1", "total power usage of instruction window_cc1", &window_power_cc1, 0, NULL);
989

    
990
  stat_reg_double(sdb, "lsq_power_cc1", "total power usage of lsq_cc1", &lsq_power_cc1, 0, NULL);
991

    
992
  stat_reg_double(sdb, "regfile_power_cc1", "total power usage of arch. regfile_cc1", &regfile_power_cc1, 0, NULL);
993

    
994
  stat_reg_double(sdb, "icache_power_cc1", "total power usage of icache_cc1", &icache_power_cc1, 0, NULL);
995

    
996
  stat_reg_double(sdb, "dcache_power_cc1", "total power usage of dcache_cc1", &dcache_power_cc1, 0, NULL);
997

    
998
  stat_reg_double(sdb, "dcache2_power_cc1", "total power usage of dcache2_cc1", &dcache2_power_cc1, 0, NULL);
999

    
1000
  stat_reg_double(sdb, "alu_power_cc1", "total power usage of alu_cc1", &alu_power_cc1, 0, NULL);
1001

    
1002
  stat_reg_double(sdb, "resultbus_power_cc1", "total power usage of resultbus_cc1", &resultbus_power_cc1, 0, NULL);
1003

    
1004
  stat_reg_double(sdb, "clock_power_cc1", "total power usage of clock_cc1", &clock_power_cc1, 0, NULL);
1005

    
1006
  stat_reg_formula(sdb, "avg_rename_power_cc1", "avg power usage of rename unit_cc1", "rename_power_cc1/sim_cycle", NULL);
1007

    
1008
  stat_reg_formula(sdb, "avg_bpred_power_cc1", "avg power usage of bpred unit_cc1", "bpred_power_cc1/sim_cycle", NULL);
1009

    
1010
  stat_reg_formula(sdb, "avg_window_power_cc1", "avg power usage of instruction window_cc1", "window_power_cc1/sim_cycle",  NULL);
1011

    
1012
  stat_reg_formula(sdb, "avg_lsq_power_cc1", "avg power usage of lsq_cc1", "lsq_power_cc1/sim_cycle",  NULL);
1013

    
1014
  stat_reg_formula(sdb, "avg_regfile_power_cc1", "avg power usage of arch. regfile_cc1", "regfile_power_cc1/sim_cycle",  NULL);
1015

    
1016
  stat_reg_formula(sdb, "avg_icache_power_cc1", "avg power usage of icache_cc1", "icache_power_cc1/sim_cycle",  NULL);
1017

    
1018
  stat_reg_formula(sdb, "avg_dcache_power_cc1", "avg power usage of dcache_cc1", "dcache_power_cc1/sim_cycle",  NULL);
1019

    
1020
  stat_reg_formula(sdb, "avg_dcache2_power_cc1", "avg power usage of dcache2_cc1", "dcache2_power_cc1/sim_cycle",  NULL);
1021

    
1022
  stat_reg_formula(sdb, "avg_alu_power_cc1", "avg power usage of alu_cc1", "alu_power_cc1/sim_cycle",  NULL);
1023

    
1024
  stat_reg_formula(sdb, "avg_resultbus_power_cc1", "avg power usage of resultbus_cc1", "resultbus_power_cc1/sim_cycle",  NULL);
1025

    
1026
  stat_reg_formula(sdb, "avg_clock_power_cc1", "avg power usage of clock_cc1", "clock_power_cc1/sim_cycle",  NULL);
1027

    
1028
  stat_reg_formula(sdb, "fetch_stage_power_cc1", "total power usage of fetch stage_cc1", "icache_power_cc1 + bpred_power_cc1", NULL);
1029

    
1030
  stat_reg_formula(sdb, "dispatch_stage_power_cc1", "total power usage of dispatch stage_cc1", "rename_power_cc1", NULL);
1031

    
1032
  stat_reg_formula(sdb, "issue_stage_power_cc1", "total power usage of issue stage_cc1", "resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1", NULL);
1033

    
1034
  stat_reg_formula(sdb, "avg_fetch_power_cc1", "average power of fetch unit per cycle_cc1", "(icache_power_cc1 + bpred_power_cc1)/ sim_cycle", /* format */NULL);
1035

    
1036
  stat_reg_formula(sdb, "avg_dispatch_power_cc1", "average power of dispatch unit per cycle_cc1", "(rename_power_cc1)/ sim_cycle", /* format */NULL);
1037

    
1038
  stat_reg_formula(sdb, "avg_issue_power_cc1", "average power of issue unit per cycle_cc1", "(resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1)/ sim_cycle", /* format */NULL);
1039

    
1040
  stat_reg_formula(sdb, "total_power_cycle_cc1", "total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)", NULL);
1041

    
1042
  stat_reg_formula(sdb, "avg_total_power_cycle_cc1", "average total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 +dcache2_power_cc1)/sim_cycle", NULL);
1043

    
1044
  stat_reg_formula(sdb, "avg_total_power_insn_cc1", "average total power per insn_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 +  alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)/sim_total_insn", NULL);
1045

    
1046
  stat_reg_double(sdb, "rename_power_cc2", "total power usage of rename unit_cc2", &rename_power_cc2, 0, NULL);
1047

    
1048
  stat_reg_double(sdb, "bpred_power_cc2", "total power usage of bpred unit_cc2", &bpred_power_cc2, 0, NULL);
1049

    
1050
  stat_reg_double(sdb, "window_power_cc2", "total power usage of instruction window_cc2", &window_power_cc2, 0, NULL);
1051

    
1052
  stat_reg_double(sdb, "lsq_power_cc2", "total power usage of lsq_cc2", &lsq_power_cc2, 0, NULL);
1053

    
1054
  stat_reg_double(sdb, "regfile_power_cc2", "total power usage of arch. regfile_cc2", &regfile_power_cc2, 0, NULL);
1055

    
1056
  stat_reg_double(sdb, "icache_power_cc2", "total power usage of icache_cc2", &icache_power_cc2, 0, NULL);
1057

    
1058
  stat_reg_double(sdb, "dcache_power_cc2", "total power usage of dcache_cc2", &dcache_power_cc2, 0, NULL);
1059

    
1060
  stat_reg_double(sdb, "dcache2_power_cc2", "total power usage of dcache2_cc2", &dcache2_power_cc2, 0, NULL);
1061

    
1062
  stat_reg_double(sdb, "alu_power_cc2", "total power usage of alu_cc2", &alu_power_cc2, 0, NULL);
1063

    
1064
  stat_reg_double(sdb, "resultbus_power_cc2", "total power usage of resultbus_cc2", &resultbus_power_cc2, 0, NULL);
1065

    
1066
  stat_reg_double(sdb, "clock_power_cc2", "total power usage of clock_cc2", &clock_power_cc2, 0, NULL);
1067

    
1068
  stat_reg_formula(sdb, "avg_rename_power_cc2", "avg power usage of rename unit_cc2", "rename_power_cc2/sim_cycle", NULL);
1069

    
1070
  stat_reg_formula(sdb, "avg_bpred_power_cc2", "avg power usage of bpred unit_cc2", "bpred_power_cc2/sim_cycle", NULL);
1071

    
1072
  stat_reg_formula(sdb, "avg_window_power_cc2", "avg power usage of instruction window_cc2", "window_power_cc2/sim_cycle",  NULL);
1073

    
1074
  stat_reg_formula(sdb, "avg_lsq_power_cc2", "avg power usage of instruction lsq_cc2", "lsq_power_cc2/sim_cycle",  NULL);
1075

    
1076
  stat_reg_formula(sdb, "avg_regfile_power_cc2", "avg power usage of arch. regfile_cc2", "regfile_power_cc2/sim_cycle",  NULL);
1077

    
1078
  stat_reg_formula(sdb, "avg_icache_power_cc2", "avg power usage of icache_cc2", "icache_power_cc2/sim_cycle",  NULL);
1079

    
1080
  stat_reg_formula(sdb, "avg_dcache_power_cc2", "avg power usage of dcache_cc2", "dcache_power_cc2/sim_cycle",  NULL);
1081

    
1082
  stat_reg_formula(sdb, "avg_dcache2_power_cc2", "avg power usage of dcache2_cc2", "dcache2_power_cc2/sim_cycle",  NULL);
1083

    
1084
  stat_reg_formula(sdb, "avg_alu_power_cc2", "avg power usage of alu_cc2", "alu_power_cc2/sim_cycle",  NULL);
1085

    
1086
  stat_reg_formula(sdb, "avg_resultbus_power_cc2", "avg power usage of resultbus_cc2", "resultbus_power_cc2/sim_cycle",  NULL);
1087

    
1088
  stat_reg_formula(sdb, "avg_clock_power_cc2", "avg power usage of clock_cc2", "clock_power_cc2/sim_cycle",  NULL);
1089

    
1090
  stat_reg_formula(sdb, "fetch_stage_power_cc2", "total power usage of fetch stage_cc2", "icache_power_cc2 + bpred_power_cc2", NULL);
1091

    
1092
  stat_reg_formula(sdb, "dispatch_stage_power_cc2", "total power usage of dispatch stage_cc2", "rename_power_cc2", NULL);
1093

    
1094
  stat_reg_formula(sdb, "issue_stage_power_cc2", "total power usage of issue stage_cc2", "resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2", NULL);
1095

    
1096
  stat_reg_formula(sdb, "avg_fetch_power_cc2", "average power of fetch unit per cycle_cc2", "(icache_power_cc2 + bpred_power_cc2)/ sim_cycle", /* format */NULL);
1097

    
1098
  stat_reg_formula(sdb, "avg_dispatch_power_cc2", "average power of dispatch unit per cycle_cc2", "(rename_power_cc2)/ sim_cycle", /* format */NULL);
1099

    
1100
  stat_reg_formula(sdb, "avg_issue_power_cc2", "average power of issue unit per cycle_cc2", "(resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2)/ sim_cycle", /* format */NULL);
1101

    
1102
  stat_reg_formula(sdb, "total_power_cycle_cc2", "total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)", NULL);
1103

    
1104
  stat_reg_formula(sdb, "avg_total_power_cycle_cc2", "average total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_cycle", NULL);
1105

    
1106
  stat_reg_formula(sdb, "avg_total_power_insn_cc2", "average total power per insn_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_total_insn", NULL);
1107

    
1108
  stat_reg_double(sdb, "rename_power_cc3", "total power usage of rename unit_cc3", &rename_power_cc3, 0, NULL);
1109

    
1110
  stat_reg_double(sdb, "bpred_power_cc3", "total power usage of bpred unit_cc3", &bpred_power_cc3, 0, NULL);
1111

    
1112
  stat_reg_double(sdb, "window_power_cc3", "total power usage of instruction window_cc3", &window_power_cc3, 0, NULL);
1113

    
1114
  stat_reg_double(sdb, "lsq_power_cc3", "total power usage of lsq_cc3", &lsq_power_cc3, 0, NULL);
1115

    
1116
  stat_reg_double(sdb, "regfile_power_cc3", "total power usage of arch. regfile_cc3", &regfile_power_cc3, 0, NULL);
1117

    
1118
  stat_reg_double(sdb, "icache_power_cc3", "total power usage of icache_cc3", &icache_power_cc3, 0, NULL);
1119

    
1120
  stat_reg_double(sdb, "dcache_power_cc3", "total power usage of dcache_cc3", &dcache_power_cc3, 0, NULL);
1121

    
1122
  stat_reg_double(sdb, "dcache2_power_cc3", "total power usage of dcache2_cc3", &dcache2_power_cc3, 0, NULL);
1123

    
1124
  stat_reg_double(sdb, "alu_power_cc3", "total power usage of alu_cc3", &alu_power_cc3, 0, NULL);
1125

    
1126
  stat_reg_double(sdb, "resultbus_power_cc3", "total power usage of resultbus_cc3", &resultbus_power_cc3, 0, NULL);
1127

    
1128
  stat_reg_double(sdb, "clock_power_cc3", "total power usage of clock_cc3", &clock_power_cc3, 0, NULL);
1129

    
1130
  stat_reg_formula(sdb, "avg_rename_power_cc3", "avg power usage of rename unit_cc3", "rename_power_cc3/sim_cycle", NULL);
1131

    
1132
  stat_reg_formula(sdb, "avg_bpred_power_cc3", "avg power usage of bpred unit_cc3", "bpred_power_cc3/sim_cycle", NULL);
1133

    
1134
  stat_reg_formula(sdb, "avg_window_power_cc3", "avg power usage of instruction window_cc3", "window_power_cc3/sim_cycle",  NULL);
1135

    
1136
  stat_reg_formula(sdb, "avg_lsq_power_cc3", "avg power usage of instruction lsq_cc3", "lsq_power_cc3/sim_cycle",  NULL);
1137

    
1138
  stat_reg_formula(sdb, "avg_regfile_power_cc3", "avg power usage of arch. regfile_cc3", "regfile_power_cc3/sim_cycle",  NULL);
1139

    
1140
  stat_reg_formula(sdb, "avg_icache_power_cc3", "avg power usage of icache_cc3", "icache_power_cc3/sim_cycle",  NULL);
1141

    
1142
  stat_reg_formula(sdb, "avg_dcache_power_cc3", "avg power usage of dcache_cc3", "dcache_power_cc3/sim_cycle",  NULL);
1143

    
1144
  stat_reg_formula(sdb, "avg_dcache2_power_cc3", "avg power usage of dcache2_cc3", "dcache2_power_cc3/sim_cycle",  NULL);
1145

    
1146
  stat_reg_formula(sdb, "avg_alu_power_cc3", "avg power usage of alu_cc3", "alu_power_cc3/sim_cycle",  NULL);
1147

    
1148
  stat_reg_formula(sdb, "avg_resultbus_power_cc3", "avg power usage of resultbus_cc3", "resultbus_power_cc3/sim_cycle",  NULL);
1149

    
1150
  stat_reg_formula(sdb, "avg_clock_power_cc3", "avg power usage of clock_cc3", "clock_power_cc3/sim_cycle",  NULL);
1151

    
1152
  stat_reg_formula(sdb, "fetch_stage_power_cc3", "total power usage of fetch stage_cc3", "icache_power_cc3 + bpred_power_cc3", NULL);
1153

    
1154
  stat_reg_formula(sdb, "dispatch_stage_power_cc3", "total power usage of dispatch stage_cc3", "rename_power_cc3", NULL);
1155

    
1156
  stat_reg_formula(sdb, "issue_stage_power_cc3", "total power usage of issue stage_cc3", "resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3", NULL);
1157

    
1158
  stat_reg_formula(sdb, "avg_fetch_power_cc3", "average power of fetch unit per cycle_cc3", "(icache_power_cc3 + bpred_power_cc3)/ sim_cycle", /* format */NULL);
1159

    
1160
  stat_reg_formula(sdb, "avg_dispatch_power_cc3", "average power of dispatch unit per cycle_cc3", "(rename_power_cc3)/ sim_cycle", /* format */NULL);
1161

    
1162
  stat_reg_formula(sdb, "avg_issue_power_cc3", "average power of issue unit per cycle_cc3", "(resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3)/ sim_cycle", /* format */NULL);
1163

    
1164
  stat_reg_formula(sdb, "total_power_cycle_cc3", "total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)", NULL);
1165

    
1166
  stat_reg_formula(sdb, "avg_total_power_cycle_cc3", "average total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_cycle", NULL);
1167

    
1168
  stat_reg_formula(sdb, "avg_total_power_insn_cc3", "average total power per insn_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_total_insn", NULL);
1169

    
1170
  stat_reg_counter(sdb, "total_rename_access", "total number accesses of rename unit", &total_rename_access, 0, NULL);
1171

    
1172
  stat_reg_counter(sdb, "total_bpred_access", "total number accesses of bpred unit", &total_bpred_access, 0, NULL);
1173

    
1174
  stat_reg_counter(sdb, "total_window_access", "total number accesses of instruction window", &total_window_access, 0, NULL);
1175

    
1176
  stat_reg_counter(sdb, "total_lsq_access", "total number accesses of load/store queue", &total_lsq_access, 0, NULL);
1177

    
1178
  stat_reg_counter(sdb, "total_regfile_access", "total number accesses of arch. regfile", &total_regfile_access, 0, NULL);
1179

    
1180
  stat_reg_counter(sdb, "total_icache_access", "total number accesses of icache", &total_icache_access, 0, NULL);
1181

    
1182
  stat_reg_counter(sdb, "total_dcache_access", "total number accesses of dcache", &total_dcache_access, 0, NULL);
1183

    
1184
  stat_reg_counter(sdb, "total_dcache2_access", "total number accesses of dcache2", &total_dcache2_access, 0, NULL);
1185

    
1186
  stat_reg_counter(sdb, "total_alu_access", "total number accesses of alu", &total_alu_access, 0, NULL);
1187

    
1188
  stat_reg_counter(sdb, "total_resultbus_access", "total number accesses of resultbus", &total_resultbus_access, 0, NULL);
1189

    
1190
  stat_reg_formula(sdb, "avg_rename_access", "avg number accesses of rename unit", "total_rename_access/sim_cycle", NULL);
1191

    
1192
  stat_reg_formula(sdb, "avg_bpred_access", "avg number accesses of bpred unit", "total_bpred_access/sim_cycle", NULL);
1193

    
1194
  stat_reg_formula(sdb, "avg_window_access", "avg number accesses of instruction window", "total_window_access/sim_cycle",  NULL);
1195

    
1196
  stat_reg_formula(sdb, "avg_lsq_access", "avg number accesses of lsq", "total_lsq_access/sim_cycle",  NULL);
1197

    
1198
  stat_reg_formula(sdb, "avg_regfile_access", "avg number accesses of arch. regfile", "total_regfile_access/sim_cycle",  NULL);
1199

    
1200
  stat_reg_formula(sdb, "avg_icache_access", "avg number accesses of icache", "total_icache_access/sim_cycle",  NULL);
1201

    
1202
  stat_reg_formula(sdb, "avg_dcache_access", "avg number accesses of dcache", "total_dcache_access/sim_cycle",  NULL);
1203

    
1204
  stat_reg_formula(sdb, "avg_dcache2_access", "avg number accesses of dcache2", "total_dcache2_access/sim_cycle",  NULL);
1205

    
1206
  stat_reg_formula(sdb, "avg_alu_access", "avg number accesses of alu", "total_alu_access/sim_cycle",  NULL);
1207

    
1208
  stat_reg_formula(sdb, "avg_resultbus_access", "avg number accesses of resultbus", "total_resultbus_access/sim_cycle",  NULL);
1209

    
1210
  stat_reg_counter(sdb, "max_rename_access", "max number accesses of rename unit", &max_rename_access, 0, NULL);
1211

    
1212
  stat_reg_counter(sdb, "max_bpred_access", "max number accesses of bpred unit", &max_bpred_access, 0, NULL);
1213

    
1214
  stat_reg_counter(sdb, "max_window_access", "max number accesses of instruction window", &max_window_access, 0, NULL);
1215

    
1216
  stat_reg_counter(sdb, "max_lsq_access", "max number accesses of load/store queue", &max_lsq_access, 0, NULL);
1217

    
1218
  stat_reg_counter(sdb, "max_regfile_access", "max number accesses of arch. regfile", &max_regfile_access, 0, NULL);
1219

    
1220
  stat_reg_counter(sdb, "max_icache_access", "max number accesses of icache", &max_icache_access, 0, NULL);
1221

    
1222
  stat_reg_counter(sdb, "max_dcache_access", "max number accesses of dcache", &max_dcache_access, 0, NULL);
1223

    
1224
  stat_reg_counter(sdb, "max_dcache2_access", "max number accesses of dcache2", &max_dcache2_access, 0, NULL);
1225

    
1226
  stat_reg_counter(sdb, "max_alu_access", "max number accesses of alu", &max_alu_access, 0, NULL);
1227

    
1228
  stat_reg_counter(sdb, "max_resultbus_access", "max number accesses of resultbus", &max_resultbus_access, 0, NULL);
1229

    
1230
  stat_reg_double(sdb, "max_cycle_power_cc1", "maximum cycle power usage of cc1", &max_cycle_power_cc1, 0, NULL);
1231

    
1232
  stat_reg_double(sdb, "max_cycle_power_cc2", "maximum cycle power usage of cc2", &max_cycle_power_cc2, 0, NULL);
1233

    
1234
  stat_reg_double(sdb, "max_cycle_power_cc3", "maximum cycle power usage of cc3", &max_cycle_power_cc3, 0, NULL);
1235
  stat_reg_formula(sdb, "parasitic_power_cc3", "total parasitic power cc3", "(onchip_parasitic_cc3 + offchip_parasitic_cc3)", NULL);
1236
  stat_reg_double(sdb, "onchip parasitic_power_cc3", "onchip parasitic power cc3", &onchip_parasitic_cc3, 0, NULL);
1237
  stat_reg_double(sdb, "offchip parasitic_power_cc3", "offchip parasitic power cc3", &offchip_parasitic_cc3, 0, NULL);
1238
  stat_reg_double(sdb, "min amperage", "min amperage", &min_amp, 0, NULL);
1239
  stat_reg_double(sdb, "max amperage", "max amperage", &max_amp, 0, NULL);
1240
  stat_reg_double(sdb, "slow_cycles", "slow cycles", &slow_cycles, 0, NULL);
1241
  stat_reg_double(sdb, "fast_cycles", "fast cycles", &fast_cycles, 0, NULL);
1242
}
1243

    
1244

    
1245
/* this routine takes the number of rows and cols of an array structure
1246
   and attemps to make it make it more of a reasonable circuit structure
1247
   by trying to make the number of rows and cols as close as possible.
1248
   (scaling both by factors of 2 in opposite directions).  it returns
1249
   a scale factor which is the amount that the rows should be divided
1250
   by and the columns should be multiplied by.
1251
*/
1252
int squarify(int rows, int cols)
1253
{
1254
  int scale_factor = 1;
1255

    
1256
  if(rows == cols)
1257
    return 1;
1258

    
1259
  /*
1260
  printf("init rows == %d\n",rows);
1261
  printf("init cols == %d\n",cols);
1262
  */
1263

    
1264
  while(rows > cols) {
1265
    rows = rows/2;
1266
    cols = cols*2;
1267

    
1268
    /*
1269
    printf("rows == %d\n",rows);
1270
    printf("cols == %d\n",cols);
1271
    printf("scale_factor == %d (2^ == %d)\n\n",scale_factor,(int)pow(2.0,(double)scale_factor));
1272
    */
1273

    
1274
    if (rows/2 <= cols)
1275
      return((int)pow(2.0,(double)scale_factor));
1276
    scale_factor++;
1277
  }
1278

    
1279
  return 1;
1280
}
1281

    
1282
/* could improve squarify to work when rows < cols */
1283

    
1284
double squarify_new(int rows, int cols)
1285
{
1286
  double scale_factor = 0.0;
1287

    
1288
  if(rows==cols)
1289
    return(pow(2.0,scale_factor));
1290

    
1291
  while(rows > cols) {
1292
    rows = rows/2;
1293
    cols = cols*2;
1294
    if (rows <= cols)
1295
      return(pow(2.0,scale_factor));
1296
    scale_factor++;
1297
  }
1298

    
1299
  while(cols > rows) {
1300
    rows = rows*2;
1301
    cols = cols/2;
1302
    if (cols <= rows)
1303
      return(pow(2.0,scale_factor));
1304
    scale_factor--;
1305
  }
1306

    
1307
  return 1;
1308

    
1309
}
1310

    
1311
void dump_power_stats(power)
1312
     power_result_type *power;
1313
{
1314
  double total_power;
1315
  double bpred_power;
1316
  double rename_power;
1317
  double rat_power;
1318
  double dcl_power;
1319
  double lsq_power;
1320
  double window_power;
1321
  double wakeup_power;
1322
  double rs_power;
1323
  double lsq_wakeup_power;
1324
  double lsq_rs_power;
1325
  double regfile_power;
1326
  double reorder_power;
1327
  double icache_power;
1328
  double dcache_power;
1329
  double dcache2_power;
1330
  double dtlb_power;
1331
  double itlb_power;
1332
  double ambient_power = 2.0;
1333

    
1334
  icache_power = power->icache_power;
1335

    
1336
  dcache_power = power->dcache_power;
1337

    
1338
  dcache2_power = power->dcache2_power;
1339

    
1340
  itlb_power = power->itlb;
1341
  dtlb_power = power->dtlb;
1342

    
1343
  bpred_power = power->btb + power->local_predict + power->global_predict + 
1344
    power->chooser + power->ras;
1345

    
1346
  rat_power = power->rat_decoder + 
1347
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
1348

    
1349
  dcl_power = power->dcl_compare + power->dcl_pencode;
1350

    
1351
  rename_power = power->rat_power + power->dcl_power + power->inst_decoder_power;
1352

    
1353
  wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
1354
    power->wakeup_ormatch;
1355
   
1356
  rs_power = power->rs_decoder + 
1357
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
1358

    
1359
  window_power = wakeup_power + rs_power + power->selection;
1360

    
1361
  lsq_rs_power = power->lsq_rs_decoder + 
1362
    power->lsq_rs_wordline + power->lsq_rs_bitline + power->lsq_rs_senseamp;
1363

    
1364
  lsq_wakeup_power = power->lsq_wakeup_tagdrive + 
1365
    power->lsq_wakeup_tagmatch + power->lsq_wakeup_ormatch;
1366

    
1367
  lsq_power = lsq_wakeup_power + lsq_rs_power;
1368

    
1369
  reorder_power = power->reorder_decoder + 
1370
    power->reorder_wordline + power->reorder_bitline + 
1371
    power->reorder_senseamp;
1372

    
1373
  regfile_power = power->regfile_decoder + 
1374
    power->regfile_wordline + power->regfile_bitline + 
1375
    power->regfile_senseamp;
1376

    
1377
  total_power = bpred_power + rename_power + window_power + regfile_power +
1378
    power->resultbus + lsq_power + 
1379
    icache_power + dcache_power + dcache2_power + 
1380
    dtlb_power + itlb_power + power->clock_power + power->ialu_power +
1381
    power->falu_power;
1382

    
1383
  fprintf(stderr,"\nProcessor Parameters:\n");
1384
  fprintf(stderr,"Issue Width: %d\n",ruu_issue_width);
1385
  fprintf(stderr,"Window Size: %d\n",RUU_size);
1386
  fprintf(stderr,"Number of Virtual Registers: %d\n",MD_NUM_IREGS);
1387
  fprintf(stderr,"Number of Physical Registers: %d\n",RUU_size);
1388
  fprintf(stderr,"Datapath Width: %d\n",data_width);
1389

    
1390
  fprintf(stderr,"Total Power Consumption: %g\n",total_power+ambient_power);
1391
  fprintf(stderr,"Branch Predictor Power Consumption: %g  (%.3g%%)\n",bpred_power,100*bpred_power/total_power);
1392
  fprintf(stderr," branch target buffer power (W): %g\n",power->btb);
1393
  fprintf(stderr," local predict power (W): %g\n",power->local_predict);
1394
  fprintf(stderr," global predict power (W): %g\n",power->global_predict);
1395
  fprintf(stderr," chooser power (W): %g\n",power->chooser);
1396
  fprintf(stderr," RAS power (W): %g\n",power->ras);
1397
  fprintf(stderr,"Rename Logic Power Consumption: %g  (%.3g%%)\n",rename_power,100*rename_power/total_power);
1398
  fprintf(stderr," Instruction Decode Power (W): %g\n",power->inst_decoder_power);
1399
  fprintf(stderr," RAT decode_power (W): %g\n",power->rat_decoder);
1400
  fprintf(stderr," RAT wordline_power (W): %g\n",power->rat_wordline);
1401
  fprintf(stderr," RAT bitline_power (W): %g\n",power->rat_bitline);
1402
  fprintf(stderr," DCL Comparators (W): %g\n",power->dcl_compare);
1403
  fprintf(stderr,"Instruction Window Power Consumption: %g  (%.3g%%)\n",window_power,100*window_power/total_power);
1404
  fprintf(stderr," tagdrive (W): %g\n",power->wakeup_tagdrive);
1405
  fprintf(stderr," tagmatch (W): %g\n",power->wakeup_tagmatch);
1406
  fprintf(stderr," Selection Logic (W): %g\n",power->selection);
1407
  fprintf(stderr," decode_power (W): %g\n",power->rs_decoder);
1408
  fprintf(stderr," wordline_power (W): %g\n",power->rs_wordline);
1409
  fprintf(stderr," bitline_power (W): %g\n",power->rs_bitline);
1410
  fprintf(stderr,"Load/Store Queue Power Consumption: %g  (%.3g%%)\n",lsq_power,100*lsq_power/total_power);
1411
  fprintf(stderr," tagdrive (W): %g\n",power->lsq_wakeup_tagdrive);
1412
  fprintf(stderr," tagmatch (W): %g\n",power->lsq_wakeup_tagmatch);
1413
  fprintf(stderr," decode_power (W): %g\n",power->lsq_rs_decoder);
1414
  fprintf(stderr," wordline_power (W): %g\n",power->lsq_rs_wordline);
1415
  fprintf(stderr," bitline_power (W): %g\n",power->lsq_rs_bitline);
1416
  fprintf(stderr,"Arch. Register File Power Consumption: %g  (%.3g%%)\n",regfile_power,100*regfile_power/total_power);
1417
  fprintf(stderr," decode_power (W): %g\n",power->regfile_decoder);
1418
  fprintf(stderr," wordline_power (W): %g\n",power->regfile_wordline);
1419
  fprintf(stderr," bitline_power (W): %g\n",power->regfile_bitline);
1420
  fprintf(stderr,"Result Bus Power Consumption: %g  (%.3g%%)\n",power->resultbus,100*power->resultbus/total_power);
1421
  fprintf(stderr,"Total Clock Power: %g  (%.3g%%)\n",power->clock_power,100*power->clock_power/total_power);
1422
  fprintf(stderr,"Int ALU Power: %g  (%.3g%%)\n",power->ialu_power,100*power->ialu_power/total_power);
1423
  fprintf(stderr,"FP ALU Power: %g  (%.3g%%)\n",power->falu_power,100*power->falu_power/total_power);
1424
  fprintf(stderr,"Instruction Cache Power Consumption: %g  (%.3g%%)\n",icache_power,100*icache_power/total_power);
1425
  fprintf(stderr," decode_power (W): %g\n",power->icache_decoder);
1426
  fprintf(stderr," wordline_power (W): %g\n",power->icache_wordline);
1427
  fprintf(stderr," bitline_power (W): %g\n",power->icache_bitline);
1428
  fprintf(stderr," senseamp_power (W): %g\n",power->icache_senseamp);
1429
  fprintf(stderr," tagarray_power (W): %g\n",power->icache_tagarray);
1430
  fprintf(stderr,"Itlb_power (W): %g (%.3g%%)\n",power->itlb,100*power->itlb/total_power);
1431
  fprintf(stderr,"Data Cache Power Consumption: %g  (%.3g%%)\n",dcache_power,100*dcache_power/total_power);
1432
  fprintf(stderr," decode_power (W): %g\n",power->dcache_decoder);
1433
  fprintf(stderr," wordline_power (W): %g\n",power->dcache_wordline);
1434
  fprintf(stderr," bitline_power (W): %g\n",power->dcache_bitline);
1435
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache_senseamp);
1436
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache_tagarray);
1437
  fprintf(stderr,"Dtlb_power (W): %g (%.3g%%)\n",power->dtlb,100*power->dtlb/total_power);
1438
  fprintf(stderr,"Level 2 Cache Power Consumption: %g (%.3g%%)\n",dcache2_power,100*dcache2_power/total_power);
1439
  fprintf(stderr," decode_power (W): %g\n",power->dcache2_decoder);
1440
  fprintf(stderr," wordline_power (W): %g\n",power->dcache2_wordline);
1441
  fprintf(stderr," bitline_power (W): %g\n",power->dcache2_bitline);
1442
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache2_senseamp);
1443
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache2_tagarray);
1444
}
1445

    
1446
/*======================================================================*/
1447

    
1448

    
1449

    
1450
/* 
1451
 * This part of the code contains routines for each section as
1452
 * described in the tech report.  See the tech report for more details
1453
 * and explanations */
1454

    
1455
/*----------------------------------------------------------------------*/
1456

    
1457
double driver_size(double driving_cap, double desiredrisetime) {
1458
  double nsize, psize;
1459
  double Rpdrive; 
1460

    
1461
  Rpdrive = desiredrisetime/(driving_cap*log(VSINV)*-1.0);
1462
  psize = restowidth(Rpdrive,PCH);
1463
  nsize = restowidth(Rpdrive,NCH);
1464
  if (psize > Wworddrivemax) {
1465
    psize = Wworddrivemax;
1466
  }
1467
  if (psize < 4.0 * LSCALE)
1468
    psize = 4.0 * LSCALE;
1469

    
1470
  return (psize);
1471

    
1472
}
1473

    
1474
/* Decoder delay:  (see section 6.1 of tech report) */
1475

    
1476
double array_decoder_power(rows,cols,predeclength,rports,wports,cache)
1477
     int rows,cols;
1478
     double predeclength;
1479
     int rports,wports;
1480
     int cache;
1481
{
1482
  double Ctotal=0;
1483
  double Ceq=0;
1484
  int numstack;
1485
  int decode_bits=0;
1486
  int ports;
1487
  double rowsb;
1488

    
1489
  /* read and write ports are the same here */
1490
  ports = rports + wports;
1491

    
1492
  rowsb = (double)rows;
1493

    
1494
  /* number of input bits to be decoded */
1495
  decode_bits=ceil((logtwo(rowsb)));
1496

    
1497
  /* First stage: driving the decoders */
1498

    
1499
  /* This is the capacitance for driving one bit (and its complement).
1500
     -There are #rowsb 3->8 decoders contributing gatecap.
1501
     - 2.0 factor from 2 identical sets of drivers in parallel
1502
  */
1503
  Ceq = 2.0*(draincap(Wdecdrivep,PCH,1)+draincap(Wdecdriven,NCH,1)) +
1504
    gatecap(Wdec3to8n+Wdec3to8p,10.0)*rowsb;
1505

    
1506
  /* There are ports * #decode_bits total */
1507
  Ctotal+=ports*decode_bits*Ceq;
1508

    
1509
  if(verbose)
1510
    fprintf(stderr,"Decoder -- Driving decoders            == %g\n",.3*Ctotal*Powerfactor);
1511

    
1512
  /* second stage: driving a bunch of nor gates with a nand 
1513
     numstack is the size of the nor gates -- ie. a 7-128 decoder has
1514
     3-input NAND followed by 3-input NOR  */
1515

    
1516
  numstack = ceil((1.0/3.0)*logtwo(rows));
1517

    
1518
  if (numstack<=0) numstack = 1;
1519
  if (numstack>5) numstack = 5;
1520

    
1521
  /* There are #rowsb NOR gates being driven*/
1522
  Ceq = (3.0*draincap(Wdec3to8p,PCH,1) +draincap(Wdec3to8n,NCH,3) +
1523
         gatecap(WdecNORn+WdecNORp,((numstack*40)+20.0)))*rowsb;
1524

    
1525
  Ctotal+=ports*Ceq;
1526

    
1527
  if(verbose)
1528
    fprintf(stderr,"Decoder -- Driving nor w/ nand         == %g\n",.3*ports*Ceq*Powerfactor);
1529

    
1530
  /* Final stage: driving an inverter with the nor 
1531
     (inverter preceding wordline driver) -- wordline driver is in the next section*/
1532

    
1533
  Ceq = (gatecap(Wdecinvn+Wdecinvp,20.0)+
1534
         numstack*draincap(WdecNORn,NCH,1)+
1535
         draincap(WdecNORp,PCH,numstack));
1536

    
1537
  if(verbose)
1538
    fprintf(stderr,"Decoder -- Driving inverter w/ nor     == %g\n",.3*ports*Ceq*Powerfactor);
1539

    
1540
  Ctotal+=ports*Ceq;
1541

    
1542
  /* assume Activity Factor == .3  */
1543

    
1544
  return(.3*Ctotal*Powerfactor);
1545
}
1546

    
1547
double simple_array_decoder_power(rows,cols,rports,wports,cache)
1548
     int rows,cols;
1549
     int rports,wports;
1550
     int cache;
1551
{
1552
  double predeclength=0.0;
1553
  return(array_decoder_power(rows,cols,predeclength,rports,wports,cache));
1554
}
1555

    
1556

    
1557
double array_wordline_power(rows,cols,wordlinelength,rports,wports,cache)
1558
     int rows,cols;
1559
     double wordlinelength;
1560
     int rports,wports;
1561
     int cache;
1562
{
1563
  double Ctotal=0;
1564
  double Ceq=0;
1565
  double Cline=0;
1566
  double Cliner, Clinew=0;
1567
  double desiredrisetime,psize,nsize;
1568
  int ports;
1569
  double colsb;
1570

    
1571
  ports = rports+wports;
1572

    
1573
  colsb = (double)cols;
1574

    
1575
  /* Calculate size of wordline drivers assuming rise time == Period / 8 
1576
     - estimate cap on line 
1577
     - compute min resistance to achieve this with RC 
1578
     - compute width needed to achieve this resistance */
1579

    
1580
  desiredrisetime = Period/16;
1581
  Cline = (gatecappass(Wmemcellr,1.0))*colsb + wordlinelength*CM3metal;
1582
  psize = driver_size(Cline,desiredrisetime);
1583
  
1584
  /* how do we want to do p-n ratioing? -- here we just assume the same ratio 
1585
     from an inverter pair  */
1586
  nsize = psize * Wdecinvn/Wdecinvp; 
1587
  
1588
  if(verbose)
1589
    fprintf(stderr,"Wordline Driver Sizes -- nsize == %f, psize == %f\n",nsize,psize);
1590

    
1591
  Ceq = draincap(Wdecinvn,NCH,1) + draincap(Wdecinvp,PCH,1) +
1592
    gatecap(nsize+psize,20.0);
1593

    
1594
  Ctotal+=ports*Ceq;
1595

    
1596
  if(verbose)
1597
    fprintf(stderr,"Wordline -- Inverter -> Driver         == %g\n",ports*Ceq*Powerfactor);
1598

    
1599
  /* Compute caps of read wordline and write wordlines 
1600
     - wordline driver caps, given computed width from above
1601
     - read wordlines have 1 nmos access tx, size ~4
1602
     - write wordlines have 2 nmos access tx, size ~2
1603
     - metal line cap
1604
  */
1605

    
1606
  Cliner = (gatecappass(Wmemcellr,(BitWidth-2*Wmemcellr)/2.0))*colsb+
1607
    wordlinelength*CM3metal+
1608
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1609
  Clinew = (2.0*gatecappass(Wmemcellw,(BitWidth-2*Wmemcellw)/2.0))*colsb+
1610
    wordlinelength*CM3metal+
1611
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1612

    
1613
  if(verbose) {
1614
    fprintf(stderr,"Wordline -- Line                       == %g\n",1e12*Cline);
1615
    fprintf(stderr,"Wordline -- Line -- access -- gatecap  == %g\n",1e12*colsb*2*gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0));
1616
    fprintf(stderr,"Wordline -- Line -- driver -- draincap == %g\n",1e12*draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1617
    fprintf(stderr,"Wordline -- Line -- metal              == %g\n",1e12*wordlinelength*CM3metal);
1618
  }
1619
  Ctotal+=rports*Cliner+wports*Clinew;
1620

    
1621
  /* AF == 1 assuming a different wordline is charged each cycle, but only
1622
     1 wordline (per port) is actually used */
1623

    
1624
  return(Ctotal*Powerfactor);
1625
}
1626

    
1627
double simple_array_wordline_power(rows,cols,rports,wports,cache)
1628
     int rows,cols;
1629
     int rports,wports;
1630
     int cache;
1631
{
1632
  double wordlinelength;
1633
  int ports = rports + wports;
1634
  wordlinelength = cols *  (RegCellWidth + 2 * ports * BitlineSpacing);
1635
  return(array_wordline_power(rows,cols,wordlinelength,rports,wports,cache));
1636
}
1637

    
1638

    
1639
double array_bitline_power(rows,cols,bitlinelength,rports,wports,cache)
1640
     int rows,cols;
1641
     double bitlinelength;
1642
     int rports,wports;
1643
     int cache;
1644
{
1645
  double Ctotal=0;
1646
  double Ccolmux=0;
1647
  double Cbitrowr=0;
1648
  double Cbitroww=0;
1649
  double Cprerow=0;
1650
  double Cwritebitdrive=0;
1651
  double Cpregate=0;
1652
  double Cliner=0;
1653
  double Clinew=0;
1654
  int ports;
1655
  double rowsb;
1656
  double colsb;
1657

    
1658
  double desiredrisetime, Cline, psize, nsize;
1659

    
1660
  ports = rports + wports;
1661

    
1662
  rowsb = (double)rows;
1663
  colsb = (double)cols;
1664

    
1665
  /* Draincaps of access tx's */
1666

    
1667
  Cbitrowr = draincap(Wmemcellr,NCH,1);
1668
  Cbitroww = draincap(Wmemcellw,NCH,1);
1669

    
1670
  /* Cprerow -- precharge cap on the bitline
1671
     -simple scheme to estimate size of pre-charge tx's in a similar fashion
1672
      to wordline driver size estimation.
1673
     -FIXME: it would be better to use precharge/keeper pairs, i've omitted this
1674
      from this version because it couldn't autosize as easily.
1675
  */
1676

    
1677
  desiredrisetime = Period/8;
1678

    
1679
  Cline = rowsb*Cbitrowr+CM2metal*bitlinelength;
1680
  psize = driver_size(Cline,desiredrisetime);
1681

    
1682
  /* compensate for not having an nmos pre-charging */
1683
  psize = psize + psize * Wdecinvn/Wdecinvp; 
1684

    
1685
  if(verbose)
1686
    printf("Cprerow auto   == %g (psize == %g)\n",draincap(psize,PCH,1),psize);
1687

    
1688
  Cprerow = draincap(psize,PCH,1);
1689

    
1690
  /* Cpregate -- cap due to gatecap of precharge transistors -- tack this
1691
     onto bitline cap, again this could have a keeper */
1692
  Cpregate = 4.0*gatecap(psize,10.0);
1693
  global_clockcap+=rports*cols*2.0*Cpregate;
1694

    
1695
  /* Cwritebitdrive -- write bitline drivers are used instead of the precharge
1696
     stuff for write bitlines
1697
     - 2 inverter drivers within each driver pair */
1698

    
1699
  Cline = rowsb*Cbitroww+CM2metal*bitlinelength;
1700

    
1701
  psize = driver_size(Cline,desiredrisetime);
1702
  nsize = psize * Wdecinvn/Wdecinvp; 
1703

    
1704
  Cwritebitdrive = 2.0*(draincap(psize,PCH,1)+draincap(nsize,NCH,1));
1705

    
1706
  /* 
1707
     reg files (cache==0) 
1708
     => single ended bitlines (1 bitline/col)
1709
     => AFs from pop_count
1710
     caches (cache ==1)
1711
     => double-ended bitlines (2 bitlines/col)
1712
     => AFs = .5 (since one of the two bitlines is always charging/discharging)
1713
  */
1714

    
1715
#ifdef STATIC_AF
1716
  if (cache == 0) {
1717
    /* compute the total line cap for read/write bitlines */
1718
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1719
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1720

    
1721
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1722
       in cache styles) */
1723
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1724
    Ctotal+=(1.0-POPCOUNT_AF)*rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1725
    Ctotal+=.3*wports*cols*(Clinew+Cwritebitdrive);
1726
  } 
1727
  else { 
1728
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1729
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1730
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1731
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1732
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1733
  }
1734
#else
1735
  if (cache == 0) {
1736
    /* compute the total line cap for read/write bitlines */
1737
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1738
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1739

    
1740
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1741
       in cache styles) */
1742
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1743
    Ctotal += rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1744
    Ctotal += .3*wports*cols*(Clinew+Cwritebitdrive);
1745
  } 
1746
  else { 
1747
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1748
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1749
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1750
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1751
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1752
  }
1753
#endif
1754

    
1755
  if(verbose) {
1756
    fprintf(stderr,"Bitline -- Precharge                   == %g\n",1e12*Cpregate);
1757
    fprintf(stderr,"Bitline -- Line                        == %g\n",1e12*(Cliner+Clinew));
1758
    fprintf(stderr,"Bitline -- Line -- access draincap     == %g\n",1e12*rowsb*Cbitrowr);
1759
    fprintf(stderr,"Bitline -- Line -- precharge draincap  == %g\n",1e12*Cprerow);
1760
    fprintf(stderr,"Bitline -- Line -- metal               == %g\n",1e12*bitlinelength*CM2metal);
1761
    fprintf(stderr,"Bitline -- Colmux                      == %g\n",1e12*Ccolmux);
1762

    
1763
    fprintf(stderr,"\n");
1764
  }
1765

    
1766

    
1767
  if(cache==0)
1768
    return(Ctotal*Powerfactor);
1769
  else
1770
    return(Ctotal*SensePowerfactor*.4);
1771
  
1772
}
1773

    
1774

    
1775
double simple_array_bitline_power(rows,cols,rports,wports,cache)
1776
     int rows,cols;
1777
     int rports,wports;
1778
     int cache;
1779
{
1780
  double bitlinelength;
1781

    
1782
  int ports = rports + wports;
1783

    
1784
  bitlinelength = rows * (RegCellHeight + ports * WordlineSpacing);
1785

    
1786
  return (array_bitline_power(rows,cols,bitlinelength,rports,wports,cache));
1787

    
1788
}
1789

    
1790
/* estimate senseamp power dissipation in cache structures (Zyuban's method) */
1791
double senseamp_power(int cols)
1792
{
1793
  return((double)cols * Vdd/8 * .5e-3);
1794
}
1795

    
1796
/* estimate comparator power consumption (this comparator is similar
1797
   to the tag-match structure in a CAM */
1798
double compare_cap(int compare_bits)
1799
{
1800
  double c1, c2;
1801
  /* bottom part of comparator */
1802
  c2 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2))+
1803
    draincap(Wevalinvp,PCH,1) + draincap(Wevalinvn,NCH,1);
1804

    
1805
  /* top part of comparator */
1806
  c1 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2)+
1807
                       draincap(Wcomppreequ,NCH,1)) +
1808
    gatecap(WdecNORn,1.0)+
1809
    gatecap(WdecNORp,3.0);
1810

    
1811
  return(c1 + c2);
1812
}
1813

    
1814
/* power of depency check logic */
1815
double dcl_compare_power(int compare_bits)
1816
{
1817
  double Ctotal;
1818
  int num_comparators;
1819
  
1820
  num_comparators = (ruu_decode_width - 1) * (ruu_decode_width);
1821

    
1822
  Ctotal = num_comparators * compare_cap(compare_bits);
1823

    
1824
  return(Ctotal*Powerfactor*AF);
1825
}
1826

    
1827
double simple_array_power(rows,cols,rports,wports,cache)
1828
     int rows,cols;
1829
     int rports,wports;
1830
     int cache;
1831
{
1832
  if(cache==0)
1833
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1834
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1835
            simple_array_bitline_power(rows,cols,rports,wports,cache));
1836
  else
1837
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1838
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1839
            simple_array_bitline_power(rows,cols,rports,wports,cache)+
1840
            senseamp_power(cols));
1841
}
1842

    
1843

    
1844
double cam_tagdrive(rows,cols,rports,wports)
1845
     int rows,cols,rports,wports;
1846
{
1847
  double Ctotal, Ctlcap, Cblcap, Cwlcap;
1848
  double taglinelength;
1849
  double wordlinelength;
1850
  double nsize, psize;
1851
  int ports;
1852
  Ctotal=0;
1853

    
1854
  ports = rports + wports;
1855

    
1856
  taglinelength = rows * 
1857
    (CamCellHeight + ports * MatchlineSpacing);
1858

    
1859
  wordlinelength = cols * 
1860
    (CamCellWidth + ports * TaglineSpacing);
1861

    
1862
  /* Compute tagline cap */
1863
  Ctlcap = Cmetal * taglinelength + 
1864
    rows * gatecappass(Wcomparen2,2.0) +
1865
    draincap(Wcompdrivern,NCH,1)+draincap(Wcompdriverp,PCH,1);
1866

    
1867
  /* Compute bitline cap (for writing new tags) */
1868
  Cblcap = Cmetal * taglinelength +
1869
    rows * draincap(Wmemcellr,NCH,2);
1870

    
1871
  /* autosize wordline driver */
1872
  psize = driver_size(Cmetal * wordlinelength + 2 * cols * gatecap(Wmemcellr,2.0),Period/8);
1873
  nsize = psize * Wdecinvn/Wdecinvp; 
1874

    
1875
  /* Compute wordline cap (for writing new tags) */
1876
  Cwlcap = Cmetal * wordlinelength + 
1877
    draincap(nsize,NCH,1)+draincap(psize,PCH,1) +
1878
    2 * cols * gatecap(Wmemcellr,2.0);
1879
    
1880
  Ctotal += (rports * cols * 2 * Ctlcap) + 
1881
    (wports * ((cols * 2 * Cblcap) + (rows * Cwlcap)));
1882

    
1883
  return(Ctotal*Powerfactor*AF);
1884
}
1885

    
1886
double cam_tagmatch(rows,cols,rports,wports)
1887
     int rows,cols,rports,wports;
1888
{
1889
  double Ctotal, Cmlcap;
1890
  double matchlinelength;
1891
  int ports;
1892
  Ctotal=0;
1893

    
1894
  ports = rports + wports;
1895

    
1896
  matchlinelength = cols * 
1897
    (CamCellWidth + ports * TaglineSpacing);
1898

    
1899
  Cmlcap = 2 * cols * draincap(Wcomparen1,NCH,2) + 
1900
    Cmetal * matchlinelength + draincap(Wmatchpchg,NCH,1) +
1901
    gatecap(Wmatchinvn+Wmatchinvp,10.0) +
1902
    gatecap(Wmatchnandn+Wmatchnandp,10.0);
1903

    
1904
  Ctotal += rports * rows * Cmlcap;
1905

    
1906
  global_clockcap += rports * rows * gatecap(Wmatchpchg,5.0);
1907
  
1908
  /* noring the nanded match lines */
1909
  if(ruu_issue_width >= 8)
1910
    Ctotal += 2 * gatecap(Wmatchnorn+Wmatchnorp,10.0);
1911

    
1912
  return(Ctotal*Powerfactor*AF);
1913
}
1914

    
1915
double cam_array(rows,cols,rports,wports)
1916
     int rows,cols,rports,wports;
1917
{
1918
  return(cam_tagdrive(rows,cols,rports,wports) +
1919
         cam_tagmatch(rows,cols,rports,wports));
1920
}
1921

    
1922

    
1923
double selection_power(int win_entries)
1924
{
1925
  double Ctotal, Cor, Cpencode;
1926
  int num_arbiter=1;
1927

    
1928
  Ctotal=0;
1929

    
1930
  while(win_entries > 4)
1931
    {
1932
      win_entries = (int)ceil((double)win_entries / 4.0);
1933
      num_arbiter += win_entries;
1934
    }
1935

    
1936
  Cor = 4 * draincap(WSelORn,NCH,1) + draincap(WSelORprequ,PCH,1);
1937

    
1938
  Cpencode = draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,1) + 
1939
    2*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,2) + 
1940
    3*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,3) + 
1941
    4*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,4) + 
1942
    4*gatecap(WSelEnn+WSelEnp,20.0) + 
1943
    4*draincap(WSelEnn,NCH,1) + 4*draincap(WSelEnp,PCH,1);
1944

    
1945
  Ctotal += ruu_issue_width * num_arbiter*(Cor+Cpencode);
1946

    
1947
  return(Ctotal*Powerfactor*AF);
1948
}
1949

    
1950
/* very rough clock power estimates */
1951
double total_clockpower(double die_length)
1952
{
1953

    
1954
  double clocklinelength;
1955
  double Cline,Cline2,Ctotal;
1956
  double pipereg_clockcap=0;
1957
  double global_buffercap = 0;
1958
  double Clockpower;
1959

    
1960
  double num_piperegs;
1961

    
1962
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
1963

    
1964
  /* Assume say 8 stages (kinda low now).
1965
     FIXME: this could be a lot better; user could input
1966
     number of pipestages, etc  */
1967

    
1968
  /* assume 8 pipe stages and try to estimate bits per pipe stage */
1969
  /* pipe stage 0/1 */
1970
  num_piperegs = ruu_issue_width*inst_length + data_width;
1971
  /* pipe stage 1/2 */
1972
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1973
  /* pipe stage 2/3 */
1974
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1975
  /* pipe stage 3/4 */
1976
  num_piperegs += ruu_issue_width*(3 * npreg_width + pow2(opcode_length));
1977
  /* pipe stage 4/5 */
1978
  num_piperegs += ruu_issue_width*(2*data_width + pow2(opcode_length));
1979
  /* pipe stage 5/6 */
1980
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1981
  /* pipe stage 6/7 */
1982
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1983
  /* pipe stage 7/8 */
1984
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1985

    
1986
  /* assume 50% extra in control signals (rule of thumb) */
1987
  num_piperegs = num_piperegs * 1.5;
1988

    
1989
  pipereg_clockcap = num_piperegs * 4*gatecap(10.0,0);
1990

    
1991
  /* estimate based on 3% of die being in clock metal */
1992
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
1993

    
1994
  /* another estimate */
1995
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
1996
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
1997
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
1998
  /* global_clockcap is computed within each array structure for pre-charge tx's*/
1999
  Ctotal = Cline+global_clockcap+pipereg_clockcap+global_buffercap;
2000

    
2001
  if(verbose)
2002
    fprintf(stderr,"num_piperegs == %f\n",num_piperegs);
2003

    
2004
  /* add I_ADD Clockcap and F_ADD Clockcap */
2005
  Clockpower = Ctotal*Powerfactor + res_ialu*I_ADD_CLOCK + res_fpalu*F_ADD_CLOCK;
2006

    
2007
  if(verbose) {
2008
    fprintf(stderr,"Global Clock Power: %g\n",Clockpower);
2009
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
2010
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
2011
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
2012
    fprintf(stderr," Global Clock Cap (Explicit) (W): %g\n",global_clockcap*Powerfactor+I_ADD_CLOCK+F_ADD_CLOCK);
2013
    fprintf(stderr," Global Clock Cap (Implicit) (W): %g\n",pipereg_clockcap*Powerfactor);
2014
  }
2015
  return(Clockpower);
2016

    
2017
}
2018

    
2019
/* very rough global clock power estimates */
2020
double global_clockpower(double die_length)
2021
{
2022

    
2023
  double clocklinelength;
2024
  double Cline,Cline2,Ctotal;
2025
  double global_buffercap = 0;
2026

    
2027
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
2028

    
2029
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
2030
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
2031
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
2032
  Ctotal = Cline+global_buffercap;
2033

    
2034
  if(verbose) {
2035
    fprintf(stderr,"Global Clock Power: %g\n",Ctotal*Powerfactor);
2036
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
2037
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
2038
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
2039
  }
2040

    
2041
  return(Ctotal*Powerfactor);
2042

    
2043
}
2044

    
2045

    
2046
double compute_resultbus_power()
2047
{
2048
  double Ctotal, Cline;
2049

    
2050
  double regfile_height;
2051

    
2052
  /* compute size of result bus tags */
2053
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2054

    
2055
  Ctotal=0;
2056

    
2057
  regfile_height = RUU_size * (RegCellHeight + 
2058
                               WordlineSpacing * 3 * ruu_issue_width); 
2059

    
2060
  /* assume num alu's == ialu  (FIXME: generate a more detailed result bus network model*/
2061
  Cline = Cmetal * (regfile_height + .5 * res_ialu * 3200.0 * LSCALE);
2062

    
2063
  /* or use result bus length measured from 21264 die photo */
2064
  /*  Cline = Cmetal * 3.3*1000;*/
2065

    
2066
  /* Assume ruu_issue_width result busses -- power can be scaled linearly
2067
     for number of result busses (scale by writeback_access) */
2068
  Ctotal += 2.0 * (data_width + npreg_width) * (ruu_issue_width)* Cline;
2069

    
2070
#ifdef STATIC_AF
2071
  return(Ctotal*Powerfactor*AF);
2072
#else
2073
  return(Ctotal*Powerfactor);
2074
#endif
2075
  
2076
}
2077

    
2078
void calculate_power(power)
2079
     power_result_type *power;
2080
{
2081
  double clockpower;
2082
  double predeclength, wordlinelength, bitlinelength;
2083
  int ndwl, ndbl, nspd, ntwl, ntbl, ntspd, c,b,a,cache, rowsb, colsb;
2084
  int trowsb, tcolsb, tagsize;
2085
  int va_size = 48;
2086

    
2087
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2088

    
2089
  /* these variables are needed to use Cacti to auto-size cache arrays 
2090
     (for optimal delay) */
2091
  time_result_type time_result;
2092
  time_parameter_type time_parameters;
2093

    
2094
  /* used to autosize other structures, like bpred tables */
2095
  int scale_factor;
2096

    
2097
  global_clockcap = 0;
2098

    
2099
  cache=0;
2100

    
2101

    
2102
  /* FIXME: ALU power is a simple constant, it would be better
2103
     to include bit AFs and have different numbers for different
2104
     types of operations */
2105
  power->ialu_power = res_ialu * I_ADD;
2106
  power->falu_power = res_fpalu * F_ADD;
2107

    
2108
  nvreg_width = (int)ceil(logtwo((double)MD_NUM_IREGS));
2109
  npreg_width = (int)ceil(logtwo((double)RUU_size));
2110

    
2111

    
2112
  /* RAT has shadow bits stored in each cell, this makes the
2113
     cell size larger than normal array structures, so we must
2114
     compute it here */
2115

    
2116
  predeclength = MD_NUM_IREGS * 
2117
    (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2118

    
2119
  wordlinelength = npreg_width * 
2120
    (RatCellWidth + 
2121
     6 * ruu_decode_width * BitlineSpacing + 
2122
     RatShiftRegWidth*RatNumShift);
2123

    
2124
  bitlinelength = MD_NUM_IREGS * (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2125

    
2126
  if(verbose)
2127
    fprintf(stderr,"rat power stats\n");
2128
  power->rat_decoder = array_decoder_power(MD_NUM_IREGS,npreg_width,predeclength,2*ruu_decode_width,ruu_decode_width,cache);
2129
  power->rat_wordline = array_wordline_power(MD_NUM_IREGS,npreg_width,wordlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2130
  power->rat_bitline = array_bitline_power(MD_NUM_IREGS,npreg_width,bitlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2131
  power->rat_senseamp = 0;
2132

    
2133
  power->dcl_compare = dcl_compare_power(nvreg_width);
2134
  power->dcl_pencode = 0;
2135
  power->inst_decoder_power = ruu_decode_width * simple_array_decoder_power(opcode_length,1,1,1,cache);
2136
  power->wakeup_tagdrive =cam_tagdrive(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2137
  power->wakeup_tagmatch =cam_tagmatch(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2138
  power->wakeup_ormatch =0; 
2139

    
2140
  power->selection = selection_power(RUU_size);
2141

    
2142

    
2143
  predeclength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2144

    
2145
  wordlinelength = data_width * 
2146
    (RegCellWidth + 
2147
     6 * ruu_issue_width * BitlineSpacing);
2148

    
2149
  bitlinelength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2150

    
2151
  if(verbose)
2152
    fprintf(stderr,"regfile power stats\n");
2153

    
2154
  power->regfile_decoder = array_decoder_power(MD_NUM_IREGS,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2155
  power->regfile_wordline = array_wordline_power(MD_NUM_IREGS,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2156
  power->regfile_bitline = array_bitline_power(MD_NUM_IREGS,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2157
  power->regfile_senseamp =0;
2158

    
2159
  predeclength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2160

    
2161
  wordlinelength = data_width * 
2162
    (RegCellWidth + 
2163
     6 * ruu_issue_width * BitlineSpacing);
2164

    
2165
  bitlinelength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2166

    
2167
  if(verbose)
2168
    fprintf(stderr,"res station power stats\n");
2169
  power->rs_decoder = array_decoder_power(RUU_size,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2170
  power->rs_wordline = array_wordline_power(RUU_size,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2171
  power->rs_bitline = array_bitline_power(RUU_size,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2172
  /* no senseamps in reg file structures (only caches) */
2173
  power->rs_senseamp =0;
2174

    
2175
  /* addresses go into lsq tag's */
2176
  power->lsq_wakeup_tagdrive =cam_tagdrive(LSQ_size,data_width,res_memport,res_memport);
2177
  power->lsq_wakeup_tagmatch =cam_tagmatch(LSQ_size,data_width,res_memport,res_memport);
2178
  power->lsq_wakeup_ormatch =0; 
2179

    
2180
  wordlinelength = data_width * 
2181
    (RegCellWidth + 
2182
     4 * res_memport * BitlineSpacing);
2183

    
2184
  bitlinelength = RUU_size * (RegCellHeight + 4 * res_memport * WordlineSpacing);
2185

    
2186
  /* rs's hold data */
2187
  if(verbose)
2188
    fprintf(stderr,"lsq station power stats\n");
2189
  power->lsq_rs_decoder = array_decoder_power(LSQ_size,data_width,predeclength,res_memport,res_memport,cache);
2190
  power->lsq_rs_wordline = array_wordline_power(LSQ_size,data_width,wordlinelength,res_memport,res_memport,cache);
2191
  power->lsq_rs_bitline = array_bitline_power(LSQ_size,data_width,bitlinelength,res_memport,res_memport,cache);
2192
  power->lsq_rs_senseamp =0;
2193

    
2194
  power->resultbus = compute_resultbus_power();
2195

    
2196
  /* Load cache values into what cacti is expecting */
2197
  time_parameters.cache_size = btb_config[0] * (data_width/8) * btb_config[1]; /* C */
2198
  time_parameters.block_size = (data_width/8); /* B */
2199
  time_parameters.associativity = btb_config[1]; /* A */
2200
  time_parameters.number_of_sets = btb_config[0]; /* C/(B*A) */
2201

    
2202
  /* have Cacti compute optimal cache config */
2203
  calculate_time(&time_result,&time_parameters);
2204
  output_data(&time_result,&time_parameters);
2205

    
2206
  /* extract Cacti results */
2207
  ndwl=time_result.best_Ndwl;
2208
  ndbl=time_result.best_Ndbl;
2209
  nspd=time_result.best_Nspd;
2210
  ntwl=time_result.best_Ntwl;
2211
  ntbl=time_result.best_Ntbl;
2212
  ntspd=time_result.best_Ntspd;
2213
  c = time_parameters.cache_size;
2214
  b = time_parameters.block_size;
2215
  a = time_parameters.associativity; 
2216

    
2217
  cache=1;
2218

    
2219
  /* Figure out how many rows/cols there are now */
2220
  rowsb = c/(b*a*ndbl*nspd);
2221
  colsb = 8*b*a*nspd/ndwl;
2222

    
2223
  if(verbose) {
2224
    fprintf(stderr,"%d KB %d-way btb (%d-byte block size):\n",c,a,b);
2225
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2226
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2227
  }
2228

    
2229
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2230
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2231
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2232

    
2233
  if(verbose)
2234
    fprintf(stderr,"btb power stats\n");
2235
  power->btb = ndwl*ndbl*(array_decoder_power(rowsb,colsb,predeclength,1,1,cache) + array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache) + array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache) + senseamp_power(colsb));
2236

    
2237
  cache=1;
2238

    
2239
  scale_factor = squarify(twolev_config[0],twolev_config[2]);
2240
  predeclength = (twolev_config[0] / scale_factor)* (RegCellHeight + WordlineSpacing);
2241
  wordlinelength = twolev_config[2] * scale_factor *  (RegCellWidth + BitlineSpacing);
2242
  bitlinelength = (twolev_config[0] / scale_factor) * (RegCellHeight + WordlineSpacing);
2243

    
2244
  if(verbose)
2245
    fprintf(stderr,"local predict power stats\n");
2246

    
2247
  power->local_predict = array_decoder_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,bitlinelength,1,1,cache) + senseamp_power(twolev_config[2]*scale_factor);
2248

    
2249
  scale_factor = squarify(twolev_config[1],3);
2250

    
2251
  predeclength = (twolev_config[1] / scale_factor)* (RegCellHeight + WordlineSpacing);
2252
  wordlinelength = 3 * scale_factor *  (RegCellWidth + BitlineSpacing);
2253
  bitlinelength = (twolev_config[1] / scale_factor) * (RegCellHeight + WordlineSpacing);
2254

    
2255

    
2256
  if(verbose)
2257
    fprintf(stderr,"local predict power stats\n");
2258
  power->local_predict += array_decoder_power(twolev_config[1]/scale_factor,3*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[1]/scale_factor,3*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[1]/scale_factor,3*scale_factor,bitlinelength,1,1,cache) + senseamp_power(3*scale_factor);
2259

    
2260
  if(verbose)
2261
    fprintf(stderr,"bimod_config[0] == %d\n",bimod_config[0]);
2262

    
2263
  scale_factor = squarify(bimod_config[0],2);
2264

    
2265
  predeclength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2266
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2267
  bitlinelength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2268

    
2269

    
2270
  if(verbose)
2271
    fprintf(stderr,"global predict power stats\n");
2272
  power->global_predict = array_decoder_power(bimod_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(bimod_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(bimod_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2273

    
2274
  scale_factor = squarify(comb_config[0],2);
2275

    
2276
  predeclength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2277
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2278
  bitlinelength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2279

    
2280
  if(verbose)
2281
    fprintf(stderr,"chooser predict power stats\n");
2282
  power->chooser = array_decoder_power(comb_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(comb_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(comb_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2283

    
2284
  if(verbose)
2285
    fprintf(stderr,"RAS predict power stats\n");
2286
  power->ras = simple_array_power(ras_size,data_width,1,1,0);
2287

    
2288
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2289

    
2290
  if(verbose)
2291
    fprintf(stderr,"dtlb predict power stats\n");
2292
  power->dtlb = res_memport*(cam_array(dtlb->nsets, va_size - (int)logtwo((double)dtlb->bsize),1,1) + simple_array_power(dtlb->nsets,tagsize,1,1,cache));
2293

    
2294
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2295

    
2296
  predeclength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2297
  wordlinelength = logtwo((double)itlb->bsize) * (RegCellWidth + BitlineSpacing);
2298
  bitlinelength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2299

    
2300
  if(verbose)
2301
    fprintf(stderr,"itlb predict power stats\n");
2302
  power->itlb = cam_array(itlb->nsets, va_size - (int)logtwo((double)itlb->bsize),1,1) + simple_array_power(itlb->nsets,tagsize,1,1,cache);
2303

    
2304

    
2305
  cache=1;
2306

    
2307
  time_parameters.cache_size = cache_il1->nsets * cache_il1->bsize * cache_il1->assoc; /* C */
2308
  time_parameters.block_size = cache_il1->bsize; /* B */
2309
  time_parameters.associativity = cache_il1->assoc; /* A */
2310
  time_parameters.number_of_sets = cache_il1->nsets; /* C/(B*A) */
2311

    
2312
  calculate_time(&time_result,&time_parameters);
2313
  output_data(&time_result,&time_parameters);
2314

    
2315
  ndwl=time_result.best_Ndwl;
2316
  ndbl=time_result.best_Ndbl;
2317
  nspd=time_result.best_Nspd;
2318
  ntwl=time_result.best_Ntwl;
2319
  ntbl=time_result.best_Ntbl;
2320
  ntspd=time_result.best_Ntspd;
2321

    
2322
  c = time_parameters.cache_size;
2323
  b = time_parameters.block_size;
2324
  a = time_parameters.associativity;
2325

    
2326
  rowsb = c/(b*a*ndbl*nspd);
2327
  colsb = 8*b*a*nspd/ndwl;
2328

    
2329
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2330
  trowsb = c/(b*a*ntbl*ntspd);
2331
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2332
 
2333
  if(verbose) {
2334
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2335
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2336
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2337
    fprintf(stderr,"tagsize == %d\n",tagsize);
2338
  }
2339

    
2340
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2341
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2342
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2343

    
2344
  if(verbose)
2345
    fprintf(stderr,"icache power stats\n");
2346
  power->icache_decoder = ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2347
  power->icache_wordline = ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2348
  power->icache_bitline = ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2349
  power->icache_senseamp = ndwl*ndbl*senseamp_power(colsb);
2350
  power->icache_tagarray = ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2351

    
2352
  power->icache_power = power->icache_decoder + power->icache_wordline + power->icache_bitline + power->icache_senseamp + power->icache_tagarray;
2353

    
2354
  time_parameters.cache_size = cache_dl1->nsets * cache_dl1->bsize * cache_dl1->assoc; /* C */
2355
  time_parameters.block_size = cache_dl1->bsize; /* B */
2356
  time_parameters.associativity = cache_dl1->assoc; /* A */
2357
  time_parameters.number_of_sets = cache_dl1->nsets; /* C/(B*A) */
2358

    
2359
  calculate_time(&time_result,&time_parameters);
2360
  output_data(&time_result,&time_parameters);
2361

    
2362
  ndwl=time_result.best_Ndwl;
2363
  ndbl=time_result.best_Ndbl;
2364
  nspd=time_result.best_Nspd;
2365
  ntwl=time_result.best_Ntwl;
2366
  ntbl=time_result.best_Ntbl;
2367
  ntspd=time_result.best_Ntspd;
2368
  c = time_parameters.cache_size;
2369
  b = time_parameters.block_size;
2370
  a = time_parameters.associativity; 
2371

    
2372
  cache=1;
2373

    
2374
  rowsb = c/(b*a*ndbl*nspd);
2375
  colsb = 8*b*a*nspd/ndwl;
2376

    
2377
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2378
  trowsb = c/(b*a*ntbl*ntspd);
2379
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2380

    
2381
  if(verbose) {
2382
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2383
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2384
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2385
    fprintf(stderr,"tagsize == %d\n",tagsize);
2386

    
2387
    fprintf(stderr,"\nntwl == %d, ntbl == %d, ntspd == %d\n",ntwl,ntbl,ntspd);
2388
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ntwl*ntbl,trowsb,tcolsb);
2389
  }
2390

    
2391
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2392
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2393
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2394

    
2395
  if(verbose)
2396
    fprintf(stderr,"dcache power stats\n");
2397
  power->dcache_decoder = res_memport*ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2398
  power->dcache_wordline = res_memport*ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2399
  power->dcache_bitline = res_memport*ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2400
  power->dcache_senseamp = res_memport*ndwl*ndbl*senseamp_power(colsb);
2401
  power->dcache_tagarray = res_memport*ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2402

    
2403
  power->dcache_power = power->dcache_decoder + power->dcache_wordline + power->dcache_bitline + power->dcache_senseamp + power->dcache_tagarray;
2404

    
2405
  clockpower = total_clockpower(.018);
2406
  power->clock_power = clockpower;
2407
  if(verbose) {
2408
    fprintf(stderr,"result bus power == %f\n",power->resultbus);
2409
    fprintf(stderr,"global clock power == %f\n",clockpower);
2410
  }
2411

    
2412
  time_parameters.cache_size = cache_dl2->nsets * cache_dl2->bsize * cache_dl2->assoc; /* C */
2413
  time_parameters.block_size = cache_dl2->bsize; /* B */
2414
  time_parameters.associativity = cache_dl2->assoc; /* A */
2415
  time_parameters.number_of_sets = cache_dl2->nsets; /* C/(B*A) */
2416

    
2417
  calculate_time(&time_result,&time_parameters);
2418
  output_data(&time_result,&time_parameters);
2419

    
2420
  ndwl=time_result.best_Ndwl;
2421
  ndbl=time_result.best_Ndbl;
2422
  nspd=time_result.best_Nspd;
2423
  ntwl=time_result.best_Ntwl;
2424
  ntbl=time_result.best_Ntbl;
2425
  ntspd=time_result.best_Ntspd;
2426
  c = time_parameters.cache_size;
2427
  b = time_parameters.block_size;
2428
  a = time_parameters.associativity;
2429

    
2430
  rowsb = c/(b*a*ndbl*nspd);
2431
  colsb = 8*b*a*nspd/ndwl;
2432

    
2433
  tagsize = va_size - ((int)logtwo(cache_dl2->nsets) + (int)logtwo(cache_dl2->bsize));
2434
  trowsb = c/(b*a*ntbl*ntspd);
2435
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2436

    
2437
  if(verbose) {
2438
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2439
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2440
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2441
    fprintf(stderr,"tagsize == %d\n",tagsize);
2442
  }
2443

    
2444
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2445
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2446
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2447

    
2448
  if(verbose)
2449
    fprintf(stderr,"dcache2 power stats\n");
2450
  power->dcache2_decoder = array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2451
  power->dcache2_wordline = array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2452
  power->dcache2_bitline = array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2453
  power->dcache2_senseamp = senseamp_power(colsb);
2454
  power->dcache2_tagarray = simple_array_power(trowsb,tcolsb,1,1,cache);
2455

    
2456
  power->dcache2_power = power->dcache2_decoder + power->dcache2_wordline + power->dcache2_bitline + power->dcache2_senseamp + power->dcache2_tagarray;
2457

    
2458
  power->rat_decoder *= crossover_scaling;
2459
  power->rat_wordline *= crossover_scaling;
2460
  power->rat_bitline *= crossover_scaling;
2461

    
2462
  power->dcl_compare *= crossover_scaling;
2463
  power->dcl_pencode *= crossover_scaling;
2464
  power->inst_decoder_power *= crossover_scaling;
2465
  power->wakeup_tagdrive *= crossover_scaling;
2466
  power->wakeup_tagmatch *= crossover_scaling;
2467
  power->wakeup_ormatch *= crossover_scaling;
2468

    
2469
  power->selection *= crossover_scaling;
2470

    
2471
  power->regfile_decoder *= crossover_scaling;
2472
  power->regfile_wordline *= crossover_scaling;
2473
  power->regfile_bitline *= crossover_scaling;
2474
  power->regfile_senseamp *= crossover_scaling;
2475

    
2476
  power->rs_decoder *= crossover_scaling;
2477
  power->rs_wordline *= crossover_scaling;
2478
  power->rs_bitline *= crossover_scaling;
2479
  power->rs_senseamp *= crossover_scaling;
2480

    
2481
  power->lsq_wakeup_tagdrive *= crossover_scaling;
2482
  power->lsq_wakeup_tagmatch *= crossover_scaling;
2483

    
2484
  power->lsq_rs_decoder *= crossover_scaling;
2485
  power->lsq_rs_wordline *= crossover_scaling;
2486
  power->lsq_rs_bitline *= crossover_scaling;
2487
  power->lsq_rs_senseamp *= crossover_scaling;
2488
 
2489
  power->resultbus *= crossover_scaling;
2490

    
2491
  power->btb *= crossover_scaling;
2492
  power->local_predict *= crossover_scaling;
2493
  power->global_predict *= crossover_scaling;
2494
  power->chooser *= crossover_scaling;
2495

    
2496
  power->dtlb *= crossover_scaling;
2497

    
2498
  power->itlb *= crossover_scaling;
2499

    
2500
  power->icache_decoder *= crossover_scaling;
2501
  power->icache_wordline*= crossover_scaling;
2502
  power->icache_bitline *= crossover_scaling;
2503
  power->icache_senseamp*= crossover_scaling;
2504
  power->icache_tagarray*= crossover_scaling;
2505

    
2506
  power->icache_power *= crossover_scaling;
2507

    
2508
  power->dcache_decoder *= crossover_scaling;
2509
  power->dcache_wordline *= crossover_scaling;
2510
  power->dcache_bitline *= crossover_scaling;
2511
  power->dcache_senseamp *= crossover_scaling;
2512
  power->dcache_tagarray *= crossover_scaling;
2513

    
2514
  power->dcache_power *= crossover_scaling;
2515
  
2516
  power->clock_power *= crossover_scaling;
2517

    
2518
  power->dcache2_decoder *= crossover_scaling;
2519
  power->dcache2_wordline *= crossover_scaling;
2520
  power->dcache2_bitline *= crossover_scaling;
2521
  power->dcache2_senseamp *= crossover_scaling;
2522
  power->dcache2_tagarray *= crossover_scaling;
2523

    
2524
  power->dcache2_power *= crossover_scaling;
2525

    
2526
  power->total_power = power->local_predict + power->global_predict + 
2527
    power->chooser + power->btb +
2528
    power->rat_decoder + power->rat_wordline + 
2529
    power->rat_bitline + power->rat_senseamp + 
2530
    power->dcl_compare + power->dcl_pencode + 
2531
    power->inst_decoder_power +
2532
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2533
    power->selection +
2534
    power->regfile_decoder + power->regfile_wordline + 
2535
    power->regfile_bitline + power->regfile_senseamp +  
2536
    power->rs_decoder + power->rs_wordline +
2537
    power->rs_bitline + power->rs_senseamp + 
2538
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2539
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2540
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2541
    power->resultbus +
2542
    power->clock_power +
2543
    power->icache_power + 
2544
    power->itlb + 
2545
    power->dcache_power + 
2546
    power->dtlb + 
2547
    power->dcache2_power;
2548

    
2549
  power->total_power_nodcache2 =power->local_predict + power->global_predict + 
2550
    power->chooser + power->btb +
2551
    power->rat_decoder + power->rat_wordline + 
2552
    power->rat_bitline + power->rat_senseamp + 
2553
    power->dcl_compare + power->dcl_pencode + 
2554
    power->inst_decoder_power +
2555
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2556
    power->selection +
2557
    power->regfile_decoder + power->regfile_wordline + 
2558
    power->regfile_bitline + power->regfile_senseamp +  
2559
    power->rs_decoder + power->rs_wordline +
2560
    power->rs_bitline + power->rs_senseamp + 
2561
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2562
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2563
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2564
    power->resultbus +
2565
    power->clock_power +
2566
    power->icache_power + 
2567
    power->itlb + 
2568
    power->dcache_power + 
2569
    power->dtlb + 
2570
    power->dcache2_power;
2571

    
2572
  power->bpred_power = power->btb + power->local_predict + power->global_predict + power->chooser + power->ras;
2573

    
2574
  power->rat_power = power->rat_decoder + 
2575
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
2576

    
2577
  power->dcl_power = power->dcl_compare + power->dcl_pencode;
2578

    
2579
  power->rename_power = power->rat_power + 
2580
    power->dcl_power + 
2581
    power->inst_decoder_power;
2582

    
2583
  power->wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
2584
    power->wakeup_ormatch;
2585

    
2586
  power->rs_power = power->rs_decoder + 
2587
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
2588

    
2589
  power->rs_power_nobit = power->rs_decoder + 
2590
    power->rs_wordline + power->rs_senseamp;
2591

    
2592
  power->window_power = power->wakeup_power + power->rs_power + 
2593
    power->selection;
2594

    
2595
  power->lsq_rs_power = power->lsq_rs_decoder + 
2596
    power->lsq_rs_wordline + power->lsq_rs_bitline + 
2597
    power->lsq_rs_senseamp;
2598

    
2599
  power->lsq_rs_power_nobit = power->lsq_rs_decoder + 
2600
    power->lsq_rs_wordline + power->lsq_rs_senseamp;
2601
   
2602
  power->lsq_wakeup_power = power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch;
2603

    
2604
  power->lsq_power = power->lsq_wakeup_power + power->lsq_rs_power;
2605

    
2606
  power->regfile_power = power->regfile_decoder + 
2607
    power->regfile_wordline + power->regfile_bitline + 
2608
    power->regfile_senseamp;
2609

    
2610
  power->regfile_power_nobit = power->regfile_decoder + 
2611
    power->regfile_wordline + power->regfile_senseamp;
2612

    
2613
  dump_power_stats(power);
2614

    
2615
}