Statistics
| Revision:

root / wattch / power.c @ 54

History | View | Annotate | Download (103 KB)

1
/* I inclued this copyright since we're using Cacti for some stuff */
2

    
3
/*------------------------------------------------------------
4
 *  Copyright 1994 Digital Equipment Corporation and Steve Wilton
5
 *                         All Rights Reserved
6
 *
7
 * Permission to use, copy, and modify this software and its documentation is
8
 * hereby granted only under the following terms and conditions.  Both the
9
 * above copyright notice and this permission notice must appear in all copies
10
 * of the software, derivative works or modified versions, and any portions
11
 * thereof, and both notices must appear in supporting documentation.
12
 *
13
 * Users of this software agree to the terms and conditions set forth herein,
14
 * and hereby grant back to Digital a non-exclusive, unrestricted, royalty-
15
 * free right and license under any changes, enhancements or extensions
16
 * made to the core functions of the software, including but not limited to
17
 * those affording compatibility with other hardware or software
18
 * environments, but excluding applications which incorporate this software.
19
 * Users further agree to use their best efforts to return to Digital any
20
 * such changes, enhancements or extensions that they make and inform Digital
21
 * of noteworthy uses of this software.  Correspondence should be provided
22
 * to Digital at:
23
 *
24
 *                       Director of Licensing
25
 *                       Western Research Laboratory
26
 *                       Digital Equipment Corporation
27
 *                       100 Hamilton Avenue
28
 *                       Palo Alto, California  94301
29
 *
30
 * This software may be distributed (but not offered for sale or transferred
31
 * for compensation) to third parties, provided such third parties agree to
32
 * abide by the terms and conditions of this notice.
33
 *
34
 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
35
 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
36
 * OF MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
37
 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
38
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
39
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
41
 * SOFTWARE.
42
 *------------------------------------------------------------*/
43

    
44
#include <math.h>
45
#include "power.h"
46
#include "machine.h"
47
#include "cache.h"
48
#include "sim.h"
49
#include <assert.h>
50

    
51
//#define SensePowerfactor (Mhz)*(Vdd/2)*(Vdd/2)
52
//#define Sense2Powerfactor (Mhz)*(2*.3+.1*Vdd)
53
//#define Powerfactor (Mhz)*Vdd*Vdd
54
//#define LowSwingPowerfactor (Mhz)*.2*.2
55
/* set scale for crossover (vdd->gnd) currents */
56
double crossover_scaling = 1.2;
57
/* set non-ideal turnoff percentage */
58
double turnoff_factor = 0.1;
59

    
60
#define MSCALE (LSCALE * .624 / .2250)
61

    
62
/*----------------------------------------------------------------------*/
63

    
64
/* static power model results */
65
power_result_type power;
66

    
67
int pow2(int x) {
68
  return((int)pow(2.0,(double)x));
69
}
70

    
71
double logfour(x)
72
     double x;
73
{
74
  if (x<=0) fprintf(stderr,"%e\n",x);
75
  return( (double) (log(x)/log(4.0)) );
76
}
77

    
78
/* safer pop count to validate the fast algorithm */
79
int pop_count_slow(bquad_t bits)
80
{
81
  int count = 0; 
82
  bquad_t tmpbits = bits; 
83
  while (tmpbits) { 
84
    if (tmpbits & 1) ++count; 
85
    tmpbits >>= 1; 
86
  } 
87
  return count; 
88
}
89

    
90
/* fast pop count */
91
int pop_count(bquad_t bits)
92
{
93
#define T unsigned long long
94
#define ONES ((T)(-1)) 
95
#define TWO(k) ((T)1 << (k)) 
96
#define CYCL(k) (ONES/(1 + (TWO(TWO(k))))) 
97
#define BSUM(x,k) ((x)+=(x) >> TWO(k), (x) &= CYCL(k)) 
98
  bquad_t x = bits; 
99
  x = (x & CYCL(0)) + ((x>>TWO(0)) & CYCL(0)); 
100
  x = (x & CYCL(1)) + ((x>>TWO(1)) & CYCL(1)); 
101
  BSUM(x,2); 
102
  BSUM(x,3); 
103
  BSUM(x,4); 
104
  BSUM(x,5); 
105
  return x; 
106
}
107

    
108

    
109
int opcode_length = 8;
110
int inst_length = 32;
111

    
112
extern int ruu_decode_width;
113
extern int ruu_issue_width;
114
extern int ruu_commit_width;
115
extern int RUU_size;
116
extern int LSQ_size;
117
extern int data_width;
118
extern int res_ialu;
119
extern int res_fpalu;
120
extern int res_memport;
121

    
122
int nvreg_width;
123
int npreg_width;
124

    
125
extern int bimod_config[];
126

    
127
extern struct cache_t *cache_dl1;
128
extern struct cache_t *cache_il1;
129
extern struct cache_t *cache_dl2;
130

    
131
extern struct cache_t *dtlb;
132
extern struct cache_t *itlb;
133

    
134
/* 2-level predictor config (<l1size> <l2size> <hist_size> <xor>) */
135
extern int twolev_config[];
136

    
137
/* combining predictor config (<meta_table_size> */
138
extern int comb_config[];
139

    
140
/* return address stack (RAS) size */
141
extern int ras_size;
142

    
143
/* BTB predictor config (<num_sets> <associativity>) */
144
extern int btb_config[];
145

    
146
double global_clockcap;
147

    
148
static double rename_power=0;
149
static double bpred_power=0;
150
static double window_power=0;
151
static double lsq_power=0;
152
static double regfile_power=0;
153
static double icache_power=0;
154
static double dcache_power=0;
155
static double dcache2_power=0;
156
static double alu_power=0;
157
static double falu_power=0;
158
static double resultbus_power=0;
159
static double clock_power=0;
160

    
161
static double rename_power_cc1=0;
162
static double bpred_power_cc1=0;
163
static double window_power_cc1=0;
164
static double lsq_power_cc1=0;
165
static double regfile_power_cc1=0;
166
static double icache_power_cc1=0;
167
static double dcache_power_cc1=0;
168
static double dcache2_power_cc1=0;
169
static double alu_power_cc1=0;
170
static double resultbus_power_cc1=0;
171
static double clock_power_cc1=0;
172

    
173
static double rename_power_cc2=0;
174
static double bpred_power_cc2=0;
175
static double window_power_cc2=0;
176
static double lsq_power_cc2=0;
177
static double regfile_power_cc2=0;
178
static double icache_power_cc2=0;
179
static double dcache_power_cc2=0;
180
static double dcache2_power_cc2=0;
181
static double alu_power_cc2=0;
182
static double resultbus_power_cc2=0;
183
static double clock_power_cc2=0;
184

    
185
static double rename_power_cc3=0;
186
static double bpred_power_cc3=0;
187
static double window_power_cc3=0;
188
static double lsq_power_cc3=0;
189
static double regfile_power_cc3=0;
190
static double icache_power_cc3=0;
191
static double dcache_power_cc3=0;
192
static double dcache2_power_cc3=0;
193
static double alu_power_cc3=0;
194
static double resultbus_power_cc3=0;
195
static double clock_power_cc3=0;
196

    
197
static double total_cycle_power;
198
static double total_cycle_power_cc1;
199
static double total_cycle_power_cc2;
200
static double total_cycle_power_cc3;
201

    
202
static double total_parasitic_cc1 = 0.0;
203
static double total_parasitic_cc2 = 0.0;
204
static double total_parasitic_cc3 = 0.0;
205
static double offchip_parasitic_cc1 = 0.0;
206
static double offchip_parasitic_cc2 = 0.0;
207
static double offchip_parasitic_cc3 = 0.0;
208
static double onchip_parasitic_cc1 = 0.0;
209
static double onchip_parasitic_cc2 = 0.0;
210
static double onchip_parasitic_cc3 = 0.0;
211
#define PARASITIC_OHM 0.002
212
static double max_amp = 0.00;
213
static double min_amp = 1000.00;
214
static double offchip_ploss[] = {0.5, 0.5, // 1 amp
215
                                 0.5, 0.5, // 2 amp
216
                                 0.5, 0.5, // 3 amp
217
                                 0.6, 0.7, // 4
218
                                 0.8, 0.9, // 5
219
                                 1.0, 1.1, // 6
220
                                 1.2, 1.3, // 7
221
                                 1.5, 1.6, // 8
222
                                 1.8, 2.0, // 9
223
                                 2.2, 2.4, // 10
224
                                 2.6, 2.8, // 11
225
                                 3.0, 3.3, // 12
226
                                 3.6, 3.9, 4.0}; // 13
227

    
228
static double last_single_total_cycle_power_cc1 = 0.0;
229
static double last_single_total_cycle_power_cc2 = 0.0;
230
static double last_single_total_cycle_power_cc3 = 0.0;
231
static double current_total_cycle_power_cc1;
232
static double current_total_cycle_power_cc2;
233
static double current_total_cycle_power_cc3;
234

    
235
static double last_sim_num_insn = 0;
236
static double last_sim_total_insn = 0;
237
static double diff_dispatch = 0;
238
static double diff_commit = 0;
239
static int speed_grade = 1;
240
static int last_speed_grade = 1;
241
static double diff_dispatch_sum = 0;
242
static double diff_commit_sum = 0;
243
static int init_count = 0;
244
//#define DVFS_FIX
245
#define SUM_OVER 50000 // longer time = more power consumed
246
static double hist_dispatch[SUM_OVER];
247
static double hist_commit[SUM_OVER];
248
static int hist_idx = 0;
249
static double slow_cycles = 0;
250
static double fast_cycles = 0;
251
static double last_switch_time = 0;
252
static double cycle_count = 0;
253
#define SWITCH_CYCLES 30
254
static int speed_delay[SWITCH_CYCLES];
255
#define ONCHIP_VREG_LOSS_LOW 0.220
256
#define ONCHIP_VREG_LOSS_HIGH 0.120
257

    
258
static double max_cycle_power_cc1 = 0.0;
259
static double max_cycle_power_cc2 = 0.0;
260
static double max_cycle_power_cc3 = 0.0;
261

    
262
extern counter_t rename_access;
263
extern counter_t bpred_access;
264
extern counter_t window_access;
265
extern counter_t lsq_access;
266
extern counter_t regfile_access;
267
extern counter_t icache_access;
268
extern counter_t dcache_access;
269
extern counter_t dcache2_access;
270
extern counter_t alu_access;
271
extern counter_t ialu_access;
272
extern counter_t falu_access;
273
extern counter_t resultbus_access;
274

    
275
extern counter_t window_selection_access;
276
extern counter_t window_wakeup_access;
277
extern counter_t window_preg_access;
278
extern counter_t lsq_preg_access;
279
extern counter_t lsq_wakeup_access;
280
extern counter_t lsq_store_data_access;
281
extern counter_t lsq_load_data_access;
282

    
283
extern counter_t window_total_pop_count_cycle;
284
extern counter_t window_num_pop_count_cycle;
285
extern counter_t lsq_total_pop_count_cycle;
286
extern counter_t lsq_num_pop_count_cycle;
287
extern counter_t regfile_total_pop_count_cycle;
288
extern counter_t regfile_num_pop_count_cycle;
289
extern counter_t resultbus_total_pop_count_cycle;
290
extern counter_t resultbus_num_pop_count_cycle;
291

    
292
static counter_t total_rename_access=0;
293
static counter_t total_bpred_access=0;
294
static counter_t total_window_access=0;
295
static counter_t total_lsq_access=0;
296
static counter_t total_regfile_access=0;
297
static counter_t total_icache_access=0;
298
static counter_t total_dcache_access=0;
299
static counter_t total_dcache2_access=0;
300
static counter_t total_alu_access=0;
301
static counter_t total_resultbus_access=0;
302

    
303
static counter_t max_rename_access;
304
static counter_t max_bpred_access;
305
static counter_t max_window_access;
306
static counter_t max_lsq_access;
307
static counter_t max_regfile_access;
308
static counter_t max_icache_access;
309
static counter_t max_dcache_access;
310
static counter_t max_dcache2_access;
311
static counter_t max_alu_access;
312
static counter_t max_resultbus_access;
313

    
314
void clear_access_stats()
315
{
316
  rename_access=0;
317
  bpred_access=0;
318
  window_access=0;
319
  lsq_access=0;
320
  regfile_access=0;
321
  icache_access=0;
322
  dcache_access=0;
323
  dcache2_access=0;
324
  alu_access=0;
325
  ialu_access=0;
326
  falu_access=0;
327
  resultbus_access=0;
328

    
329
  window_preg_access=0;
330
  window_selection_access=0;
331
  window_wakeup_access=0;
332
  lsq_store_data_access=0;
333
  lsq_load_data_access=0;
334
  lsq_wakeup_access=0;
335
  lsq_preg_access=0;
336

    
337
  window_total_pop_count_cycle=0;
338
  window_num_pop_count_cycle=0;
339
  lsq_total_pop_count_cycle=0;
340
  lsq_num_pop_count_cycle=0;
341
  regfile_total_pop_count_cycle=0;
342
  regfile_num_pop_count_cycle=0;
343
  resultbus_total_pop_count_cycle=0;
344
  resultbus_num_pop_count_cycle=0;
345
}
346

    
347
/* compute bitline activity factors which we use to scale bitline power 
348
   Here it is very important whether we assume 0's or 1's are
349
   responsible for dissipating power in pre-charged stuctures. (since
350
   most of the bits are 0's, we assume the design is power-efficient
351
   enough to allow 0's to _not_ discharge 
352
*/
353
double compute_af(counter_t num_pop_count_cycle,counter_t total_pop_count_cycle,int pop_width) {
354
  double avg_pop_count;
355
  double af,af_b;
356

    
357
  if(num_pop_count_cycle)
358
    avg_pop_count = (double)total_pop_count_cycle / (double)num_pop_count_cycle;
359
  else
360
    avg_pop_count = 0;
361

    
362
  af = avg_pop_count / (double)pop_width;
363
  
364
  af_b = 1.0 - af;
365

    
366
  /*  printf("af == %f%%, af_b == %f%%, total_pop == %d, num_pop == %d\n",100*af,100*af_b,total_pop_count_cycle,num_pop_count_cycle); */
367

    
368
  return(af_b);
369
}
370

    
371
/* compute power statistics on each cycle, for each conditional clocking style.  Obviously
372
most of the speed penalty comes here, so if you don't want per-cycle power estimates
373
you could post-process 
374

375
See README.wattch for details on the various clock gating styles.
376

377
*/
378
void update_power_stats()
379
{
380
  double window_af_b, lsq_af_b, regfile_af_b, resultbus_af_b;
381
  double current;
382
  int speed_idx;
383

    
384
#ifdef DYNAMIC_AF
385
  window_af_b = compute_af(window_num_pop_count_cycle,window_total_pop_count_cycle,data_width);
386
  lsq_af_b = compute_af(lsq_num_pop_count_cycle,lsq_total_pop_count_cycle,data_width);
387
  regfile_af_b = compute_af(regfile_num_pop_count_cycle,regfile_total_pop_count_cycle,data_width);
388
  resultbus_af_b = compute_af(resultbus_num_pop_count_cycle,resultbus_total_pop_count_cycle,data_width);
389
#endif
390
  
391
  rename_power+=power.rename_power;
392
  bpred_power+=power.bpred_power;
393
  window_power+=power.window_power;
394
  lsq_power+=power.lsq_power;
395
  regfile_power+=power.regfile_power;
396
  icache_power+=power.icache_power+power.itlb;
397
  dcache_power+=power.dcache_power+power.dtlb;
398
  dcache2_power+=power.dcache2_power;
399
  alu_power+=power.ialu_power + power.falu_power;
400
  falu_power+=power.falu_power;
401
  resultbus_power+=power.resultbus;
402
  clock_power+=power.clock_power;
403

    
404
  total_rename_access+=rename_access;
405
  total_bpred_access+=bpred_access;
406
  total_window_access+=window_access;
407
  total_lsq_access+=lsq_access;
408
  total_regfile_access+=regfile_access;
409
  total_icache_access+=icache_access;
410
  total_dcache_access+=dcache_access;
411
  total_dcache2_access+=dcache2_access;
412
  total_alu_access+=alu_access;
413
  total_resultbus_access+=resultbus_access;
414

    
415
  max_rename_access=MAX(rename_access,max_rename_access);
416
  max_bpred_access=MAX(bpred_access,max_bpred_access);
417
  max_window_access=MAX(window_access,max_window_access);
418
  max_lsq_access=MAX(lsq_access,max_lsq_access);
419
  max_regfile_access=MAX(regfile_access,max_regfile_access);
420
  max_icache_access=MAX(icache_access,max_icache_access);
421
  max_dcache_access=MAX(dcache_access,max_dcache_access);
422
  max_dcache2_access=MAX(dcache2_access,max_dcache2_access);
423
  max_alu_access=MAX(alu_access,max_alu_access);
424
  max_resultbus_access=MAX(resultbus_access,max_resultbus_access);
425
      
426
  if(rename_access) {
427
    rename_power_cc1+=power.rename_power;
428
    rename_power_cc2+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
429
    rename_power_cc3+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
430
  }
431
  else 
432
    rename_power_cc3+=turnoff_factor*power.rename_power;
433

    
434
  if(bpred_access) {
435
    if(bpred_access <= 2)
436
      bpred_power_cc1+=power.bpred_power;
437
    else
438
      bpred_power_cc1+=((double)bpred_access/2.0) * power.bpred_power;
439
    bpred_power_cc2+=((double)bpred_access/2.0) * power.bpred_power;
440
    bpred_power_cc3+=((double)bpred_access/2.0) * power.bpred_power;
441
  }
442
  else
443
    bpred_power_cc3+=turnoff_factor*power.bpred_power;
444

    
445
#ifdef STATIC_AF
446
  if(window_preg_access) {
447
    if(window_preg_access <= 3*ruu_issue_width)
448
      window_power_cc1+=power.rs_power;
449
    else
450
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
451
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
452
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
453
  }
454
  else
455
    window_power_cc3+=turnoff_factor*power.rs_power;
456
#elif defined(DYNAMIC_AF)
457
  if(window_preg_access) {
458
    if(window_preg_access <= 3*ruu_issue_width)
459
      window_power_cc1+=power.rs_power_nobit + window_af_b*power.rs_bitline;
460
    else
461
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
462
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
463
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
464
  }
465
  else
466
    window_power_cc3+=turnoff_factor*power.rs_power;
467
#else
468
  panic("no AF-style defined\n");
469
#endif
470

    
471
  if(window_selection_access) {
472
    if(window_selection_access <= ruu_issue_width)
473
      window_power_cc1+=power.selection;
474
    else
475
      window_power_cc1+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
476
    window_power_cc2+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
477
    window_power_cc3+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
478
  }
479
  else
480
    window_power_cc3+=turnoff_factor*power.selection;
481

    
482
  if(window_wakeup_access) {
483
    if(window_wakeup_access <= ruu_issue_width)
484
      window_power_cc1+=power.wakeup_power;
485
    else
486
      window_power_cc1+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
487
    window_power_cc2+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
488
    window_power_cc3+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
489
  }
490
  else
491
    window_power_cc3+=turnoff_factor*power.wakeup_power;
492

    
493
  if(lsq_wakeup_access) {
494
    if(lsq_wakeup_access <= res_memport)
495
      lsq_power_cc1+=power.lsq_wakeup_power;
496
    else
497
      lsq_power_cc1+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
498
    lsq_power_cc2+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
499
    lsq_power_cc3+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
500
  }
501
  else
502
    lsq_power_cc3+=turnoff_factor*power.lsq_wakeup_power;
503

    
504
#ifdef STATIC_AF
505
  if(lsq_preg_access) {
506
    if(lsq_preg_access <= res_memport)
507
      lsq_power_cc1+=power.lsq_rs_power;
508
    else
509
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
510
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
511
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
512
  }
513
  else
514
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
515
#else
516
  if(lsq_preg_access) {
517
    if(lsq_preg_access <= res_memport)
518
      lsq_power_cc1+=power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline;
519
    else
520
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
521
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
522
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
523
  }
524
  else
525
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
526
#endif
527

    
528
#ifdef STATIC_AF
529
  if(regfile_access) {
530
    if(regfile_access <= (3.0*ruu_commit_width))
531
      regfile_power_cc1+=power.regfile_power;
532
    else
533
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
534
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
535
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
536
  }
537
  else
538
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
539
#else
540
  if(regfile_access) {
541
    if(regfile_access <= (3.0*ruu_commit_width))
542
      regfile_power_cc1+=power.regfile_power_nobit + regfile_af_b*power.regfile_bitline;
543
    else
544
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
545
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
546
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
547
  }
548
  else
549
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
550
#endif
551

    
552
  if(icache_access) {
553
    /* don't scale icache because we assume 1 line is fetched, unless fetch stalls */
554
    icache_power_cc1+=power.icache_power+power.itlb;
555
    icache_power_cc2+=power.icache_power+power.itlb;
556
    icache_power_cc3+=power.icache_power+power.itlb;
557
  }
558
  else
559
    icache_power_cc3+=turnoff_factor*(power.icache_power+power.itlb);
560

    
561
  if(dcache_access) {
562
    if(dcache_access <= res_memport)
563
      dcache_power_cc1+=power.dcache_power+power.dtlb;
564
    else
565
      dcache_power_cc1+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
566
                                                     power.dtlb);
567
    dcache_power_cc2+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
568
                                                   power.dtlb);
569
    dcache_power_cc3+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
570
                                                   power.dtlb);
571
  }
572
  else
573
    dcache_power_cc3+=turnoff_factor*(power.dcache_power+power.dtlb);
574

    
575
  if(dcache2_access) {
576
    if(dcache2_access <= res_memport)
577
      dcache2_power_cc1+=power.dcache2_power;
578
    else
579
      dcache2_power_cc1+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
580
    dcache2_power_cc2+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
581
    dcache2_power_cc3+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
582
  }
583
  else
584
    dcache2_power_cc3+=turnoff_factor*power.dcache2_power;
585

    
586
  if(alu_access) {
587
    if(ialu_access)
588
      alu_power_cc1+=power.ialu_power;
589
    else
590
      alu_power_cc3+=turnoff_factor*power.ialu_power;
591
    if(falu_access)
592
      alu_power_cc1+=power.falu_power;
593
    else
594
      alu_power_cc3+=turnoff_factor*power.falu_power;
595

    
596
    alu_power_cc2+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
597
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
598
    alu_power_cc3+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
599
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
600
  }
601
  else
602
    alu_power_cc3+=turnoff_factor*(power.ialu_power + power.falu_power);
603

    
604
#ifdef STATIC_AF
605
  if(resultbus_access) {
606
    assert(ruu_issue_width != 0);
607
    if(resultbus_access <= ruu_issue_width) {
608
      resultbus_power_cc1+=power.resultbus;
609
    }
610
    else {
611
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
612
    }
613
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
614
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
615
  }
616
  else
617
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
618
#else
619
  if(resultbus_access) {
620
    assert(ruu_issue_width != 0);
621
    if(resultbus_access <= ruu_issue_width) {
622
      resultbus_power_cc1+=resultbus_af_b*power.resultbus;
623
    }
624
    else {
625
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
626
    }
627
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
628
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
629
  }
630
  else
631
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
632
#endif
633

    
634
  total_cycle_power = rename_power + bpred_power + window_power + 
635
    lsq_power + regfile_power + icache_power + dcache_power +
636
    alu_power + resultbus_power;
637

    
638
  total_cycle_power_cc1 = rename_power_cc1 + bpred_power_cc1 + 
639
    window_power_cc1 + lsq_power_cc1 + regfile_power_cc1 + 
640
    icache_power_cc1 + dcache_power_cc1 + alu_power_cc1 + 
641
    resultbus_power_cc1;
642

    
643
  total_cycle_power_cc2 = rename_power_cc2 + bpred_power_cc2 + 
644
    window_power_cc2 + lsq_power_cc2 + regfile_power_cc2 + 
645
    icache_power_cc2 + dcache_power_cc2 + alu_power_cc2 + 
646
    resultbus_power_cc2;
647

    
648
  total_cycle_power_cc3 = rename_power_cc3 + bpred_power_cc3 + 
649
    window_power_cc3 + lsq_power_cc3 + regfile_power_cc3 + 
650
    icache_power_cc3 + dcache_power_cc3 + alu_power_cc3 + 
651
    resultbus_power_cc3;
652

    
653
  clock_power_cc1+=power.clock_power*(total_cycle_power_cc1/total_cycle_power);
654
  clock_power_cc2+=power.clock_power*(total_cycle_power_cc2/total_cycle_power);
655
  clock_power_cc3+=power.clock_power*(total_cycle_power_cc3/total_cycle_power);
656

    
657
  total_cycle_power_cc1 += clock_power_cc1;
658
  total_cycle_power_cc2 += clock_power_cc2;
659
  total_cycle_power_cc3 += clock_power_cc3;
660

    
661
  current_total_cycle_power_cc1 = total_cycle_power_cc1
662
    -last_single_total_cycle_power_cc1;
663
  current_total_cycle_power_cc2 = total_cycle_power_cc2
664
    -last_single_total_cycle_power_cc2;
665
  current_total_cycle_power_cc3 = total_cycle_power_cc3
666
    -last_single_total_cycle_power_cc3;
667

    
668
   current = current_total_cycle_power_cc3 / Vdd;
669

    
670
  if (max_amp < current ) {
671
      max_amp = current ;
672
  }
673

    
674
  if (min_amp > current) {
675
      min_amp = current;
676
  }
677

    
678
  if (current < 0.5) {
679
      offchip_parasitic_cc1 += offchip_ploss[0];
680
      offchip_parasitic_cc2 += offchip_ploss[0];
681
      offchip_parasitic_cc3 += offchip_ploss[0];
682
  } else if (current < 1) {
683
      offchip_parasitic_cc1 += offchip_ploss[1];
684
      offchip_parasitic_cc2 += offchip_ploss[1];
685
      offchip_parasitic_cc3 += offchip_ploss[1];
686
  } else if (current < 1.5) {
687
      offchip_parasitic_cc1 += offchip_ploss[2];
688
      offchip_parasitic_cc2 += offchip_ploss[2];
689
      offchip_parasitic_cc3 += offchip_ploss[2];
690
  } else if (current < 2) {
691
      offchip_parasitic_cc1 += offchip_ploss[3];
692
      offchip_parasitic_cc2 += offchip_ploss[3];
693
      offchip_parasitic_cc3 += offchip_ploss[3];
694
  } else if (current < 2.5) {
695
      offchip_parasitic_cc1 += offchip_ploss[4];
696
      offchip_parasitic_cc2 += offchip_ploss[4];
697
      offchip_parasitic_cc3 += offchip_ploss[4];
698
  } else if (current < 3) {
699
      offchip_parasitic_cc1 += offchip_ploss[5];
700
      offchip_parasitic_cc2 += offchip_ploss[5];
701
      offchip_parasitic_cc3 += offchip_ploss[5];
702
  } else if (current < 3.5) {
703
      offchip_parasitic_cc1 += offchip_ploss[6];
704
      offchip_parasitic_cc2 += offchip_ploss[6];
705
      offchip_parasitic_cc3 += offchip_ploss[6];
706
  } else if (current < 4) {
707
      offchip_parasitic_cc1 += offchip_ploss[7];
708
      offchip_parasitic_cc2 += offchip_ploss[7];
709
      offchip_parasitic_cc3 += offchip_ploss[7];
710
  } else if (current < 4.5) {
711
      offchip_parasitic_cc1 += offchip_ploss[8];
712
      offchip_parasitic_cc2 += offchip_ploss[8];
713
      offchip_parasitic_cc3 += offchip_ploss[8];
714
  } else if (current < 5) {
715
      offchip_parasitic_cc1 += offchip_ploss[9];
716
      offchip_parasitic_cc2 += offchip_ploss[9];
717
      offchip_parasitic_cc3 += offchip_ploss[9];
718
  } else if (current < 5.5) {
719
      offchip_parasitic_cc1 += offchip_ploss[10];
720
      offchip_parasitic_cc2 += offchip_ploss[10];
721
      offchip_parasitic_cc3 += offchip_ploss[10];
722
  } else if (current < 6) {
723
      offchip_parasitic_cc1 += offchip_ploss[11];
724
      offchip_parasitic_cc2 += offchip_ploss[11];
725
      offchip_parasitic_cc3 += offchip_ploss[11];
726
  } else if (current < 6.5) {
727
      offchip_parasitic_cc1 += offchip_ploss[12];
728
      offchip_parasitic_cc2 += offchip_ploss[12];
729
      offchip_parasitic_cc3 += offchip_ploss[12];
730
  } else if (current < 7) {
731
      offchip_parasitic_cc1 += offchip_ploss[13];
732
      offchip_parasitic_cc2 += offchip_ploss[13];
733
      offchip_parasitic_cc3 += offchip_ploss[13];
734
  } else if (current < 7.5) {
735
      offchip_parasitic_cc1 += offchip_ploss[14];
736
      offchip_parasitic_cc2 += offchip_ploss[14];
737
      offchip_parasitic_cc3 += offchip_ploss[14];
738
  } else if (current < 8) {
739
      offchip_parasitic_cc1 += offchip_ploss[15];
740
      offchip_parasitic_cc2 += offchip_ploss[15];
741
      offchip_parasitic_cc3 += offchip_ploss[15];
742
  } else if (current < 8.5) {
743
      offchip_parasitic_cc1 += offchip_ploss[16];
744
      offchip_parasitic_cc2 += offchip_ploss[16];
745
      offchip_parasitic_cc3 += offchip_ploss[16];
746
  } else if (current < 9) {
747
      offchip_parasitic_cc1 += offchip_ploss[17];
748
      offchip_parasitic_cc2 += offchip_ploss[17];
749
      offchip_parasitic_cc3 += offchip_ploss[17];
750
  } else if (current < 9.5) {
751
      offchip_parasitic_cc1 += offchip_ploss[18];
752
      offchip_parasitic_cc2 += offchip_ploss[18];
753
      offchip_parasitic_cc3 += offchip_ploss[18];
754
  } else if (current < 10) {
755
      offchip_parasitic_cc1 += offchip_ploss[19];
756
      offchip_parasitic_cc2 += offchip_ploss[19];
757
      offchip_parasitic_cc3 += offchip_ploss[19];
758
  } else if (current < 10.5) {
759
      offchip_parasitic_cc1 += offchip_ploss[20];
760
      offchip_parasitic_cc2 += offchip_ploss[20];
761
      offchip_parasitic_cc3 += offchip_ploss[20];
762
  } else if (current < 11) {
763
      offchip_parasitic_cc1 += offchip_ploss[21];
764
      offchip_parasitic_cc2 += offchip_ploss[21];
765
      offchip_parasitic_cc3 += offchip_ploss[21];
766
  } else if (current < 11.5) {
767
      offchip_parasitic_cc1 += offchip_ploss[22];
768
      offchip_parasitic_cc2 += offchip_ploss[22];
769
      offchip_parasitic_cc3 += offchip_ploss[22];
770
  } else if (current < 12) {
771
      offchip_parasitic_cc1 += offchip_ploss[23];
772
      offchip_parasitic_cc2 += offchip_ploss[23];
773
      offchip_parasitic_cc3 += offchip_ploss[23];
774
  } else if (current < 12.5) {
775
      offchip_parasitic_cc1 += offchip_ploss[24];
776
      offchip_parasitic_cc2 += offchip_ploss[24];
777
      offchip_parasitic_cc3 += offchip_ploss[24];
778
  } else if (current < 13) {
779
      offchip_parasitic_cc1 += offchip_ploss[25];
780
      offchip_parasitic_cc2 += offchip_ploss[25];
781
      offchip_parasitic_cc3 += offchip_ploss[25];
782
  } else {
783
      offchip_parasitic_cc1 += offchip_ploss[26];
784
      offchip_parasitic_cc2 += offchip_ploss[26];
785
      offchip_parasitic_cc3 += offchip_ploss[26];
786
  }
787

    
788
  offchip_parasitic_cc1 += pow(current, 2) * PARASITIC_OHM;
789
  offchip_parasitic_cc2 += pow(current, 2) * PARASITIC_OHM;
790
  offchip_parasitic_cc3 += pow(current, 2) * PARASITIC_OHM;
791

    
792
  // Onchip regulator paraisitc loss
793
  if (speed_grade == 0) {
794
      onchip_parasitic_cc1 += ONCHIP_VREG_LOSS_LOW;
795
      onchip_parasitic_cc2 += ONCHIP_VREG_LOSS_LOW;
796
      onchip_parasitic_cc3 += ONCHIP_VREG_LOSS_LOW;
797
  } else {
798
      onchip_parasitic_cc1 += ONCHIP_VREG_LOSS_HIGH;
799
      onchip_parasitic_cc2 += ONCHIP_VREG_LOSS_HIGH;
800
      onchip_parasitic_cc3 += ONCHIP_VREG_LOSS_HIGH;
801
  }
802
    
803
  total_parasitic_cc1 += onchip_parasitic_cc1 + offchip_parasitic_cc1;
804
  total_parasitic_cc2 += onchip_parasitic_cc2 + offchip_parasitic_cc2;
805
  total_parasitic_cc3 += onchip_parasitic_cc3 + offchip_parasitic_cc3;
806

    
807
  max_cycle_power_cc1 = MAX(max_cycle_power_cc1,current_total_cycle_power_cc1);
808
  max_cycle_power_cc2 = MAX(max_cycle_power_cc2,current_total_cycle_power_cc2);
809
  max_cycle_power_cc3 = MAX(max_cycle_power_cc3,current_total_cycle_power_cc3);
810

    
811
  last_single_total_cycle_power_cc1 = total_cycle_power_cc1;
812
  last_single_total_cycle_power_cc2 = total_cycle_power_cc2;
813
  last_single_total_cycle_power_cc3 = total_cycle_power_cc3;
814

    
815
  cycle_count++;
816

    
817
  // here's where we change VFI levels
818
  diff_dispatch = sim_total_insn - last_sim_total_insn;
819
  diff_commit = sim_num_insn - last_sim_num_insn;
820
  
821
  diff_dispatch_sum += diff_dispatch;
822
  diff_commit_sum += diff_commit;
823

    
824
  hist_dispatch[hist_idx] = diff_dispatch;
825
  hist_commit[hist_idx] = diff_commit;
826
  hist_idx++;
827
  if(hist_idx >= SUM_OVER) {
828
    hist_idx = 0;
829
  }
830

    
831
  if(init_count >= SUM_OVER) {
832
      // Update speed
833
    speed_grade = speed_delay[SWITCH_CYCLES - 1];
834
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES-1; speed_idx++) {
835

    
836
        speed_delay[speed_idx+1] = speed_delay[speed_idx];
837
    }
838

    
839
    diff_dispatch_sum -= hist_dispatch[hist_idx];
840
    diff_commit_sum -= hist_commit[hist_idx];
841

    
842
    if( diff_commit_sum < diff_dispatch_sum ) {
843
        speed_delay[0] = 0;
844
    }
845
    else if( diff_commit_sum >= diff_dispatch_sum ) {
846
        speed_delay[0] = 1;
847
    }
848

    
849
    if(speed_grade == 0) {
850
        slow_cycles++;
851
    }
852
    else {
853
        fast_cycles++;
854
    }
855

    
856
  } else {
857
    init_count++;
858
    fast_cycles++;
859

    
860
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES; speed_idx++) {
861
        speed_delay[speed_idx] = 1;
862
    }
863
  }
864

    
865
//  if (diff_commit <= diff_dispatch) {
866
//      speed_grade = 0;
867
//  } else if (diff_commit > diff_dispatch) {
868
//      speed_grade = 1;
869
//  }
870

    
871
  if ((speed_grade == 0) && (last_speed_grade == 1)) {
872
      Mhz = Mhz / 2;
873
      Vdd = Vdd / 2;
874
      printf("Speed down!\n");
875
      last_switch_time = cycle_count;
876
  } else if ((speed_grade == 1) && (last_speed_grade == 0)) {
877
      Mhz = Mhz * 2;
878
      Vdd = Vdd * 2;
879
      printf("Speed up!\n");
880
      last_switch_time = cycle_count;
881
  }
882
#ifdef DVFS_FIX
883
  else if (last_switch_time < cycle_count-(SUM_OVER/3) && speed_grade==0 ) {
884
      speed_grade = 1;
885
      Mhz = Mhz * 2;
886
      Vdd = Vdd * 2;
887
      init_count = 0;
888
      last_switch_time = cycle_count;
889
      hist_idx = 0;
890
      diff_commit_sum = 0;
891
      diff_dispatch_sum = 0;
892
  }
893
#endif
894
      //printf("Vdd = %f, MHz = %f\n",Vdd,Mhz);
895

    
896
  if (speed_grade != last_speed_grade) {
897
    Period = 1/Mhz;
898
    SensePowerfactor3 = Mhz * Vbitsense * Vbitsense;
899
    SensePowerfactor2 = Mhz * (Vbitpre - Vbitsense) * (Vbitpre - Vbitsense);
900
    SensePowerfactor = (Mhz) * (Vdd/2) * (Vdd/2);
901
    Powerfactor = (Mhz) * (Vdd) * (Vdd);
902
    Sense2Powerfactor = Mhz * (2 * .3 + .1 * Vdd);
903
    LowSwingPowerfactor = Mhz * .2 * .2;
904
      calculate_power(&power);
905
  }
906

    
907
  last_speed_grade = speed_grade;
908

    
909
  // Update
910
  last_sim_num_insn  = sim_num_insn;
911
  last_sim_total_insn = sim_total_insn;
912

    
913
}
914

    
915
void
916
power_reg_stats(struct stat_sdb_t *sdb)        /* stats database */
917
{
918
  stat_reg_double(sdb, "rename_power", "total power usage of rename unit", &rename_power, 0, NULL);
919

    
920
  stat_reg_double(sdb, "bpred_power", "total power usage of bpred unit", &bpred_power, 0, NULL);
921

    
922
  stat_reg_double(sdb, "window_power", "total power usage of instruction window", &window_power, 0, NULL);
923

    
924
  stat_reg_double(sdb, "lsq_power", "total power usage of load/store queue", &lsq_power, 0, NULL);
925

    
926
  stat_reg_double(sdb, "regfile_power", "total power usage of arch. regfile", &regfile_power, 0, NULL);
927

    
928
  stat_reg_double(sdb, "icache_power", "total power usage of icache", &icache_power, 0, NULL);
929

    
930
  stat_reg_double(sdb, "dcache_power", "total power usage of dcache", &dcache_power, 0, NULL);
931

    
932
  stat_reg_double(sdb, "dcache2_power", "total power usage of dcache2", &dcache2_power, 0, NULL);
933

    
934
  stat_reg_double(sdb, "alu_power", "total power usage of alu", &alu_power, 0, NULL);
935

    
936
  stat_reg_double(sdb, "falu_power", "total power usage of falu", &falu_power, 0, NULL);
937

    
938
  stat_reg_double(sdb, "resultbus_power", "total power usage of resultbus", &resultbus_power, 0, NULL);
939

    
940
  stat_reg_double(sdb, "clock_power", "total power usage of clock", &clock_power, 0, NULL);
941

    
942
  stat_reg_formula(sdb, "avg_rename_power", "avg power usage of rename unit", "rename_power/sim_cycle", NULL);
943

    
944
  stat_reg_formula(sdb, "avg_bpred_power", "avg power usage of bpred unit", "bpred_power/sim_cycle", NULL);
945

    
946
  stat_reg_formula(sdb, "avg_window_power", "avg power usage of instruction window", "window_power/sim_cycle",  NULL);
947

    
948
  stat_reg_formula(sdb, "avg_lsq_power", "avg power usage of lsq", "lsq_power/sim_cycle",  NULL);
949

    
950
  stat_reg_formula(sdb, "avg_regfile_power", "avg power usage of arch. regfile", "regfile_power/sim_cycle",  NULL);
951

    
952
  stat_reg_formula(sdb, "avg_icache_power", "avg power usage of icache", "icache_power/sim_cycle",  NULL);
953

    
954
  stat_reg_formula(sdb, "avg_dcache_power", "avg power usage of dcache", "dcache_power/sim_cycle",  NULL);
955

    
956
  stat_reg_formula(sdb, "avg_dcache2_power", "avg power usage of dcache2", "dcache2_power/sim_cycle",  NULL);
957

    
958
  stat_reg_formula(sdb, "avg_alu_power", "avg power usage of alu", "alu_power/sim_cycle",  NULL);
959

    
960
  stat_reg_formula(sdb, "avg_falu_power", "avg power usage of falu", "falu_power/sim_cycle",  NULL);
961

    
962
  stat_reg_formula(sdb, "avg_resultbus_power", "avg power usage of resultbus", "resultbus_power/sim_cycle",  NULL);
963

    
964
  stat_reg_formula(sdb, "avg_clock_power", "avg power usage of clock", "clock_power/sim_cycle",  NULL);
965

    
966
  stat_reg_formula(sdb, "fetch_stage_power", "total power usage of fetch stage", "icache_power + bpred_power", NULL);
967

    
968
  stat_reg_formula(sdb, "dispatch_stage_power", "total power usage of dispatch stage", "rename_power", NULL);
969

    
970
  stat_reg_formula(sdb, "issue_stage_power", "total power usage of issue stage", "resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power", NULL);
971

    
972
  stat_reg_formula(sdb, "avg_fetch_power", "average power of fetch unit per cycle", "(icache_power + bpred_power)/ sim_cycle", /* format */NULL);
973

    
974
  stat_reg_formula(sdb, "avg_dispatch_power", "average power of dispatch unit per cycle", "(rename_power)/ sim_cycle", /* format */NULL);
975

    
976
  stat_reg_formula(sdb, "avg_issue_power", "average power of issue unit per cycle", "(resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power)/ sim_cycle", /* format */NULL);
977

    
978
  stat_reg_formula(sdb, "total_power", "total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power  + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)", NULL);
979

    
980
  stat_reg_formula(sdb, "avg_total_power_cycle", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_cycle", NULL);
981

    
982
  stat_reg_formula(sdb, "avg_total_power_cycle_nofp_nod2", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_cycle", NULL);
983

    
984
  stat_reg_formula(sdb, "avg_total_power_insn", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_total_insn", NULL);
985

    
986
  stat_reg_formula(sdb, "avg_total_power_insn_nofp_nod2", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_total_insn", NULL);
987

    
988
  stat_reg_double(sdb, "rename_power_cc1", "total power usage of rename unit_cc1", &rename_power_cc1, 0, NULL);
989

    
990
  stat_reg_double(sdb, "bpred_power_cc1", "total power usage of bpred unit_cc1", &bpred_power_cc1, 0, NULL);
991

    
992
  stat_reg_double(sdb, "window_power_cc1", "total power usage of instruction window_cc1", &window_power_cc1, 0, NULL);
993

    
994
  stat_reg_double(sdb, "lsq_power_cc1", "total power usage of lsq_cc1", &lsq_power_cc1, 0, NULL);
995

    
996
  stat_reg_double(sdb, "regfile_power_cc1", "total power usage of arch. regfile_cc1", &regfile_power_cc1, 0, NULL);
997

    
998
  stat_reg_double(sdb, "icache_power_cc1", "total power usage of icache_cc1", &icache_power_cc1, 0, NULL);
999

    
1000
  stat_reg_double(sdb, "dcache_power_cc1", "total power usage of dcache_cc1", &dcache_power_cc1, 0, NULL);
1001

    
1002
  stat_reg_double(sdb, "dcache2_power_cc1", "total power usage of dcache2_cc1", &dcache2_power_cc1, 0, NULL);
1003

    
1004
  stat_reg_double(sdb, "alu_power_cc1", "total power usage of alu_cc1", &alu_power_cc1, 0, NULL);
1005

    
1006
  stat_reg_double(sdb, "resultbus_power_cc1", "total power usage of resultbus_cc1", &resultbus_power_cc1, 0, NULL);
1007

    
1008
  stat_reg_double(sdb, "clock_power_cc1", "total power usage of clock_cc1", &clock_power_cc1, 0, NULL);
1009

    
1010
  stat_reg_formula(sdb, "avg_rename_power_cc1", "avg power usage of rename unit_cc1", "rename_power_cc1/sim_cycle", NULL);
1011

    
1012
  stat_reg_formula(sdb, "avg_bpred_power_cc1", "avg power usage of bpred unit_cc1", "bpred_power_cc1/sim_cycle", NULL);
1013

    
1014
  stat_reg_formula(sdb, "avg_window_power_cc1", "avg power usage of instruction window_cc1", "window_power_cc1/sim_cycle",  NULL);
1015

    
1016
  stat_reg_formula(sdb, "avg_lsq_power_cc1", "avg power usage of lsq_cc1", "lsq_power_cc1/sim_cycle",  NULL);
1017

    
1018
  stat_reg_formula(sdb, "avg_regfile_power_cc1", "avg power usage of arch. regfile_cc1", "regfile_power_cc1/sim_cycle",  NULL);
1019

    
1020
  stat_reg_formula(sdb, "avg_icache_power_cc1", "avg power usage of icache_cc1", "icache_power_cc1/sim_cycle",  NULL);
1021

    
1022
  stat_reg_formula(sdb, "avg_dcache_power_cc1", "avg power usage of dcache_cc1", "dcache_power_cc1/sim_cycle",  NULL);
1023

    
1024
  stat_reg_formula(sdb, "avg_dcache2_power_cc1", "avg power usage of dcache2_cc1", "dcache2_power_cc1/sim_cycle",  NULL);
1025

    
1026
  stat_reg_formula(sdb, "avg_alu_power_cc1", "avg power usage of alu_cc1", "alu_power_cc1/sim_cycle",  NULL);
1027

    
1028
  stat_reg_formula(sdb, "avg_resultbus_power_cc1", "avg power usage of resultbus_cc1", "resultbus_power_cc1/sim_cycle",  NULL);
1029

    
1030
  stat_reg_formula(sdb, "avg_clock_power_cc1", "avg power usage of clock_cc1", "clock_power_cc1/sim_cycle",  NULL);
1031

    
1032
  stat_reg_formula(sdb, "fetch_stage_power_cc1", "total power usage of fetch stage_cc1", "icache_power_cc1 + bpred_power_cc1", NULL);
1033

    
1034
  stat_reg_formula(sdb, "dispatch_stage_power_cc1", "total power usage of dispatch stage_cc1", "rename_power_cc1", NULL);
1035

    
1036
  stat_reg_formula(sdb, "issue_stage_power_cc1", "total power usage of issue stage_cc1", "resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1", NULL);
1037

    
1038
  stat_reg_formula(sdb, "avg_fetch_power_cc1", "average power of fetch unit per cycle_cc1", "(icache_power_cc1 + bpred_power_cc1)/ sim_cycle", /* format */NULL);
1039

    
1040
  stat_reg_formula(sdb, "avg_dispatch_power_cc1", "average power of dispatch unit per cycle_cc1", "(rename_power_cc1)/ sim_cycle", /* format */NULL);
1041

    
1042
  stat_reg_formula(sdb, "avg_issue_power_cc1", "average power of issue unit per cycle_cc1", "(resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1)/ sim_cycle", /* format */NULL);
1043

    
1044
  stat_reg_formula(sdb, "total_power_cycle_cc1", "total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)", NULL);
1045

    
1046
  stat_reg_formula(sdb, "avg_total_power_cycle_cc1", "average total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 +dcache2_power_cc1)/sim_cycle", NULL);
1047

    
1048
  stat_reg_formula(sdb, "avg_total_power_insn_cc1", "average total power per insn_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 +  alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)/sim_total_insn", NULL);
1049

    
1050
  stat_reg_double(sdb, "rename_power_cc2", "total power usage of rename unit_cc2", &rename_power_cc2, 0, NULL);
1051

    
1052
  stat_reg_double(sdb, "bpred_power_cc2", "total power usage of bpred unit_cc2", &bpred_power_cc2, 0, NULL);
1053

    
1054
  stat_reg_double(sdb, "window_power_cc2", "total power usage of instruction window_cc2", &window_power_cc2, 0, NULL);
1055

    
1056
  stat_reg_double(sdb, "lsq_power_cc2", "total power usage of lsq_cc2", &lsq_power_cc2, 0, NULL);
1057

    
1058
  stat_reg_double(sdb, "regfile_power_cc2", "total power usage of arch. regfile_cc2", &regfile_power_cc2, 0, NULL);
1059

    
1060
  stat_reg_double(sdb, "icache_power_cc2", "total power usage of icache_cc2", &icache_power_cc2, 0, NULL);
1061

    
1062
  stat_reg_double(sdb, "dcache_power_cc2", "total power usage of dcache_cc2", &dcache_power_cc2, 0, NULL);
1063

    
1064
  stat_reg_double(sdb, "dcache2_power_cc2", "total power usage of dcache2_cc2", &dcache2_power_cc2, 0, NULL);
1065

    
1066
  stat_reg_double(sdb, "alu_power_cc2", "total power usage of alu_cc2", &alu_power_cc2, 0, NULL);
1067

    
1068
  stat_reg_double(sdb, "resultbus_power_cc2", "total power usage of resultbus_cc2", &resultbus_power_cc2, 0, NULL);
1069

    
1070
  stat_reg_double(sdb, "clock_power_cc2", "total power usage of clock_cc2", &clock_power_cc2, 0, NULL);
1071

    
1072
  stat_reg_formula(sdb, "avg_rename_power_cc2", "avg power usage of rename unit_cc2", "rename_power_cc2/sim_cycle", NULL);
1073

    
1074
  stat_reg_formula(sdb, "avg_bpred_power_cc2", "avg power usage of bpred unit_cc2", "bpred_power_cc2/sim_cycle", NULL);
1075

    
1076
  stat_reg_formula(sdb, "avg_window_power_cc2", "avg power usage of instruction window_cc2", "window_power_cc2/sim_cycle",  NULL);
1077

    
1078
  stat_reg_formula(sdb, "avg_lsq_power_cc2", "avg power usage of instruction lsq_cc2", "lsq_power_cc2/sim_cycle",  NULL);
1079

    
1080
  stat_reg_formula(sdb, "avg_regfile_power_cc2", "avg power usage of arch. regfile_cc2", "regfile_power_cc2/sim_cycle",  NULL);
1081

    
1082
  stat_reg_formula(sdb, "avg_icache_power_cc2", "avg power usage of icache_cc2", "icache_power_cc2/sim_cycle",  NULL);
1083

    
1084
  stat_reg_formula(sdb, "avg_dcache_power_cc2", "avg power usage of dcache_cc2", "dcache_power_cc2/sim_cycle",  NULL);
1085

    
1086
  stat_reg_formula(sdb, "avg_dcache2_power_cc2", "avg power usage of dcache2_cc2", "dcache2_power_cc2/sim_cycle",  NULL);
1087

    
1088
  stat_reg_formula(sdb, "avg_alu_power_cc2", "avg power usage of alu_cc2", "alu_power_cc2/sim_cycle",  NULL);
1089

    
1090
  stat_reg_formula(sdb, "avg_resultbus_power_cc2", "avg power usage of resultbus_cc2", "resultbus_power_cc2/sim_cycle",  NULL);
1091

    
1092
  stat_reg_formula(sdb, "avg_clock_power_cc2", "avg power usage of clock_cc2", "clock_power_cc2/sim_cycle",  NULL);
1093

    
1094
  stat_reg_formula(sdb, "fetch_stage_power_cc2", "total power usage of fetch stage_cc2", "icache_power_cc2 + bpred_power_cc2", NULL);
1095

    
1096
  stat_reg_formula(sdb, "dispatch_stage_power_cc2", "total power usage of dispatch stage_cc2", "rename_power_cc2", NULL);
1097

    
1098
  stat_reg_formula(sdb, "issue_stage_power_cc2", "total power usage of issue stage_cc2", "resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2", NULL);
1099

    
1100
  stat_reg_formula(sdb, "avg_fetch_power_cc2", "average power of fetch unit per cycle_cc2", "(icache_power_cc2 + bpred_power_cc2)/ sim_cycle", /* format */NULL);
1101

    
1102
  stat_reg_formula(sdb, "avg_dispatch_power_cc2", "average power of dispatch unit per cycle_cc2", "(rename_power_cc2)/ sim_cycle", /* format */NULL);
1103

    
1104
  stat_reg_formula(sdb, "avg_issue_power_cc2", "average power of issue unit per cycle_cc2", "(resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2)/ sim_cycle", /* format */NULL);
1105

    
1106
  stat_reg_formula(sdb, "total_power_cycle_cc2", "total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)", NULL);
1107

    
1108
  stat_reg_formula(sdb, "avg_total_power_cycle_cc2", "average total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_cycle", NULL);
1109

    
1110
  stat_reg_formula(sdb, "avg_total_power_insn_cc2", "average total power per insn_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_total_insn", NULL);
1111

    
1112
  stat_reg_double(sdb, "rename_power_cc3", "total power usage of rename unit_cc3", &rename_power_cc3, 0, NULL);
1113

    
1114
  stat_reg_double(sdb, "bpred_power_cc3", "total power usage of bpred unit_cc3", &bpred_power_cc3, 0, NULL);
1115

    
1116
  stat_reg_double(sdb, "window_power_cc3", "total power usage of instruction window_cc3", &window_power_cc3, 0, NULL);
1117

    
1118
  stat_reg_double(sdb, "lsq_power_cc3", "total power usage of lsq_cc3", &lsq_power_cc3, 0, NULL);
1119

    
1120
  stat_reg_double(sdb, "regfile_power_cc3", "total power usage of arch. regfile_cc3", &regfile_power_cc3, 0, NULL);
1121

    
1122
  stat_reg_double(sdb, "icache_power_cc3", "total power usage of icache_cc3", &icache_power_cc3, 0, NULL);
1123

    
1124
  stat_reg_double(sdb, "dcache_power_cc3", "total power usage of dcache_cc3", &dcache_power_cc3, 0, NULL);
1125

    
1126
  stat_reg_double(sdb, "dcache2_power_cc3", "total power usage of dcache2_cc3", &dcache2_power_cc3, 0, NULL);
1127

    
1128
  stat_reg_double(sdb, "alu_power_cc3", "total power usage of alu_cc3", &alu_power_cc3, 0, NULL);
1129

    
1130
  stat_reg_double(sdb, "resultbus_power_cc3", "total power usage of resultbus_cc3", &resultbus_power_cc3, 0, NULL);
1131

    
1132
  stat_reg_double(sdb, "clock_power_cc3", "total power usage of clock_cc3", &clock_power_cc3, 0, NULL);
1133

    
1134
  stat_reg_formula(sdb, "avg_rename_power_cc3", "avg power usage of rename unit_cc3", "rename_power_cc3/sim_cycle", NULL);
1135

    
1136
  stat_reg_formula(sdb, "avg_bpred_power_cc3", "avg power usage of bpred unit_cc3", "bpred_power_cc3/sim_cycle", NULL);
1137

    
1138
  stat_reg_formula(sdb, "avg_window_power_cc3", "avg power usage of instruction window_cc3", "window_power_cc3/sim_cycle",  NULL);
1139

    
1140
  stat_reg_formula(sdb, "avg_lsq_power_cc3", "avg power usage of instruction lsq_cc3", "lsq_power_cc3/sim_cycle",  NULL);
1141

    
1142
  stat_reg_formula(sdb, "avg_regfile_power_cc3", "avg power usage of arch. regfile_cc3", "regfile_power_cc3/sim_cycle",  NULL);
1143

    
1144
  stat_reg_formula(sdb, "avg_icache_power_cc3", "avg power usage of icache_cc3", "icache_power_cc3/sim_cycle",  NULL);
1145

    
1146
  stat_reg_formula(sdb, "avg_dcache_power_cc3", "avg power usage of dcache_cc3", "dcache_power_cc3/sim_cycle",  NULL);
1147

    
1148
  stat_reg_formula(sdb, "avg_dcache2_power_cc3", "avg power usage of dcache2_cc3", "dcache2_power_cc3/sim_cycle",  NULL);
1149

    
1150
  stat_reg_formula(sdb, "avg_alu_power_cc3", "avg power usage of alu_cc3", "alu_power_cc3/sim_cycle",  NULL);
1151

    
1152
  stat_reg_formula(sdb, "avg_resultbus_power_cc3", "avg power usage of resultbus_cc3", "resultbus_power_cc3/sim_cycle",  NULL);
1153

    
1154
  stat_reg_formula(sdb, "avg_clock_power_cc3", "avg power usage of clock_cc3", "clock_power_cc3/sim_cycle",  NULL);
1155

    
1156
  stat_reg_formula(sdb, "fetch_stage_power_cc3", "total power usage of fetch stage_cc3", "icache_power_cc3 + bpred_power_cc3", NULL);
1157

    
1158
  stat_reg_formula(sdb, "dispatch_stage_power_cc3", "total power usage of dispatch stage_cc3", "rename_power_cc3", NULL);
1159

    
1160
  stat_reg_formula(sdb, "issue_stage_power_cc3", "total power usage of issue stage_cc3", "resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3", NULL);
1161

    
1162
  stat_reg_formula(sdb, "avg_fetch_power_cc3", "average power of fetch unit per cycle_cc3", "(icache_power_cc3 + bpred_power_cc3)/ sim_cycle", /* format */NULL);
1163

    
1164
  stat_reg_formula(sdb, "avg_dispatch_power_cc3", "average power of dispatch unit per cycle_cc3", "(rename_power_cc3)/ sim_cycle", /* format */NULL);
1165

    
1166
  stat_reg_formula(sdb, "avg_issue_power_cc3", "average power of issue unit per cycle_cc3", "(resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3)/ sim_cycle", /* format */NULL);
1167

    
1168
  stat_reg_formula(sdb, "total_power_cycle_cc3", "total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)", NULL);
1169

    
1170
  stat_reg_formula(sdb, "avg_total_power_cycle_cc3", "average total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_cycle", NULL);
1171

    
1172
  stat_reg_formula(sdb, "avg_total_power_insn_cc3", "average total power per insn_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_total_insn", NULL);
1173

    
1174
  stat_reg_counter(sdb, "total_rename_access", "total number accesses of rename unit", &total_rename_access, 0, NULL);
1175

    
1176
  stat_reg_counter(sdb, "total_bpred_access", "total number accesses of bpred unit", &total_bpred_access, 0, NULL);
1177

    
1178
  stat_reg_counter(sdb, "total_window_access", "total number accesses of instruction window", &total_window_access, 0, NULL);
1179

    
1180
  stat_reg_counter(sdb, "total_lsq_access", "total number accesses of load/store queue", &total_lsq_access, 0, NULL);
1181

    
1182
  stat_reg_counter(sdb, "total_regfile_access", "total number accesses of arch. regfile", &total_regfile_access, 0, NULL);
1183

    
1184
  stat_reg_counter(sdb, "total_icache_access", "total number accesses of icache", &total_icache_access, 0, NULL);
1185

    
1186
  stat_reg_counter(sdb, "total_dcache_access", "total number accesses of dcache", &total_dcache_access, 0, NULL);
1187

    
1188
  stat_reg_counter(sdb, "total_dcache2_access", "total number accesses of dcache2", &total_dcache2_access, 0, NULL);
1189

    
1190
  stat_reg_counter(sdb, "total_alu_access", "total number accesses of alu", &total_alu_access, 0, NULL);
1191

    
1192
  stat_reg_counter(sdb, "total_resultbus_access", "total number accesses of resultbus", &total_resultbus_access, 0, NULL);
1193

    
1194
  stat_reg_formula(sdb, "avg_rename_access", "avg number accesses of rename unit", "total_rename_access/sim_cycle", NULL);
1195

    
1196
  stat_reg_formula(sdb, "avg_bpred_access", "avg number accesses of bpred unit", "total_bpred_access/sim_cycle", NULL);
1197

    
1198
  stat_reg_formula(sdb, "avg_window_access", "avg number accesses of instruction window", "total_window_access/sim_cycle",  NULL);
1199

    
1200
  stat_reg_formula(sdb, "avg_lsq_access", "avg number accesses of lsq", "total_lsq_access/sim_cycle",  NULL);
1201

    
1202
  stat_reg_formula(sdb, "avg_regfile_access", "avg number accesses of arch. regfile", "total_regfile_access/sim_cycle",  NULL);
1203

    
1204
  stat_reg_formula(sdb, "avg_icache_access", "avg number accesses of icache", "total_icache_access/sim_cycle",  NULL);
1205

    
1206
  stat_reg_formula(sdb, "avg_dcache_access", "avg number accesses of dcache", "total_dcache_access/sim_cycle",  NULL);
1207

    
1208
  stat_reg_formula(sdb, "avg_dcache2_access", "avg number accesses of dcache2", "total_dcache2_access/sim_cycle",  NULL);
1209

    
1210
  stat_reg_formula(sdb, "avg_alu_access", "avg number accesses of alu", "total_alu_access/sim_cycle",  NULL);
1211

    
1212
  stat_reg_formula(sdb, "avg_resultbus_access", "avg number accesses of resultbus", "total_resultbus_access/sim_cycle",  NULL);
1213

    
1214
  stat_reg_counter(sdb, "max_rename_access", "max number accesses of rename unit", &max_rename_access, 0, NULL);
1215

    
1216
  stat_reg_counter(sdb, "max_bpred_access", "max number accesses of bpred unit", &max_bpred_access, 0, NULL);
1217

    
1218
  stat_reg_counter(sdb, "max_window_access", "max number accesses of instruction window", &max_window_access, 0, NULL);
1219

    
1220
  stat_reg_counter(sdb, "max_lsq_access", "max number accesses of load/store queue", &max_lsq_access, 0, NULL);
1221

    
1222
  stat_reg_counter(sdb, "max_regfile_access", "max number accesses of arch. regfile", &max_regfile_access, 0, NULL);
1223

    
1224
  stat_reg_counter(sdb, "max_icache_access", "max number accesses of icache", &max_icache_access, 0, NULL);
1225

    
1226
  stat_reg_counter(sdb, "max_dcache_access", "max number accesses of dcache", &max_dcache_access, 0, NULL);
1227

    
1228
  stat_reg_counter(sdb, "max_dcache2_access", "max number accesses of dcache2", &max_dcache2_access, 0, NULL);
1229

    
1230
  stat_reg_counter(sdb, "max_alu_access", "max number accesses of alu", &max_alu_access, 0, NULL);
1231

    
1232
  stat_reg_counter(sdb, "max_resultbus_access", "max number accesses of resultbus", &max_resultbus_access, 0, NULL);
1233

    
1234
  stat_reg_double(sdb, "max_cycle_power_cc1", "maximum cycle power usage of cc1", &max_cycle_power_cc1, 0, NULL);
1235

    
1236
  stat_reg_double(sdb, "max_cycle_power_cc2", "maximum cycle power usage of cc2", &max_cycle_power_cc2, 0, NULL);
1237

    
1238
  stat_reg_double(sdb, "max_cycle_power_cc3", "maximum cycle power usage of cc3", &max_cycle_power_cc3, 0, NULL);
1239

    
1240
  stat_reg_double(sdb, "parasitic_power_cc3", "total parasitic power cc3", &total_parasitic_cc3, 0, NULL);
1241
  stat_reg_double(sdb, "onchip parasitic_power_cc3", "onchip parasitic power cc3", &onchip_parasitic_cc3, 0, NULL);
1242
  stat_reg_double(sdb, "offchip parasitic_power_cc3", "offchip parasitic power cc3", &offchip_parasitic_cc3, 0, NULL);
1243
  stat_reg_double(sdb, "min amperage", "min amperage", &min_amp, 0, NULL);
1244
  stat_reg_double(sdb, "max amperage", "max amperage", &max_amp, 0, NULL);
1245
  stat_reg_double(sdb, "slow_cycles", "slow cycles", &slow_cycles, 0, NULL);
1246
  stat_reg_double(sdb, "fast_cycles", "fast cycles", &fast_cycles, 0, NULL);
1247
}
1248

    
1249

    
1250
/* this routine takes the number of rows and cols of an array structure
1251
   and attemps to make it make it more of a reasonable circuit structure
1252
   by trying to make the number of rows and cols as close as possible.
1253
   (scaling both by factors of 2 in opposite directions).  it returns
1254
   a scale factor which is the amount that the rows should be divided
1255
   by and the columns should be multiplied by.
1256
*/
1257
int squarify(int rows, int cols)
1258
{
1259
  int scale_factor = 1;
1260

    
1261
  if(rows == cols)
1262
    return 1;
1263

    
1264
  /*
1265
  printf("init rows == %d\n",rows);
1266
  printf("init cols == %d\n",cols);
1267
  */
1268

    
1269
  while(rows > cols) {
1270
    rows = rows/2;
1271
    cols = cols*2;
1272

    
1273
    /*
1274
    printf("rows == %d\n",rows);
1275
    printf("cols == %d\n",cols);
1276
    printf("scale_factor == %d (2^ == %d)\n\n",scale_factor,(int)pow(2.0,(double)scale_factor));
1277
    */
1278

    
1279
    if (rows/2 <= cols)
1280
      return((int)pow(2.0,(double)scale_factor));
1281
    scale_factor++;
1282
  }
1283

    
1284
  return 1;
1285
}
1286

    
1287
/* could improve squarify to work when rows < cols */
1288

    
1289
double squarify_new(int rows, int cols)
1290
{
1291
  double scale_factor = 0.0;
1292

    
1293
  if(rows==cols)
1294
    return(pow(2.0,scale_factor));
1295

    
1296
  while(rows > cols) {
1297
    rows = rows/2;
1298
    cols = cols*2;
1299
    if (rows <= cols)
1300
      return(pow(2.0,scale_factor));
1301
    scale_factor++;
1302
  }
1303

    
1304
  while(cols > rows) {
1305
    rows = rows*2;
1306
    cols = cols/2;
1307
    if (cols <= rows)
1308
      return(pow(2.0,scale_factor));
1309
    scale_factor--;
1310
  }
1311

    
1312
  return 1;
1313

    
1314
}
1315

    
1316
void dump_power_stats(power)
1317
     power_result_type *power;
1318
{
1319
  double total_power;
1320
  double bpred_power;
1321
  double rename_power;
1322
  double rat_power;
1323
  double dcl_power;
1324
  double lsq_power;
1325
  double window_power;
1326
  double wakeup_power;
1327
  double rs_power;
1328
  double lsq_wakeup_power;
1329
  double lsq_rs_power;
1330
  double regfile_power;
1331
  double reorder_power;
1332
  double icache_power;
1333
  double dcache_power;
1334
  double dcache2_power;
1335
  double dtlb_power;
1336
  double itlb_power;
1337
  double ambient_power = 2.0;
1338

    
1339
  icache_power = power->icache_power;
1340

    
1341
  dcache_power = power->dcache_power;
1342

    
1343
  dcache2_power = power->dcache2_power;
1344

    
1345
  itlb_power = power->itlb;
1346
  dtlb_power = power->dtlb;
1347

    
1348
  bpred_power = power->btb + power->local_predict + power->global_predict + 
1349
    power->chooser + power->ras;
1350

    
1351
  rat_power = power->rat_decoder + 
1352
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
1353

    
1354
  dcl_power = power->dcl_compare + power->dcl_pencode;
1355

    
1356
  rename_power = power->rat_power + power->dcl_power + power->inst_decoder_power;
1357

    
1358
  wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
1359
    power->wakeup_ormatch;
1360
   
1361
  rs_power = power->rs_decoder + 
1362
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
1363

    
1364
  window_power = wakeup_power + rs_power + power->selection;
1365

    
1366
  lsq_rs_power = power->lsq_rs_decoder + 
1367
    power->lsq_rs_wordline + power->lsq_rs_bitline + power->lsq_rs_senseamp;
1368

    
1369
  lsq_wakeup_power = power->lsq_wakeup_tagdrive + 
1370
    power->lsq_wakeup_tagmatch + power->lsq_wakeup_ormatch;
1371

    
1372
  lsq_power = lsq_wakeup_power + lsq_rs_power;
1373

    
1374
  reorder_power = power->reorder_decoder + 
1375
    power->reorder_wordline + power->reorder_bitline + 
1376
    power->reorder_senseamp;
1377

    
1378
  regfile_power = power->regfile_decoder + 
1379
    power->regfile_wordline + power->regfile_bitline + 
1380
    power->regfile_senseamp;
1381

    
1382
  total_power = bpred_power + rename_power + window_power + regfile_power +
1383
    power->resultbus + lsq_power + 
1384
    icache_power + dcache_power + dcache2_power + 
1385
    dtlb_power + itlb_power + power->clock_power + power->ialu_power +
1386
    power->falu_power;
1387

    
1388
  fprintf(stderr,"\nProcessor Parameters:\n");
1389
  fprintf(stderr,"Issue Width: %d\n",ruu_issue_width);
1390
  fprintf(stderr,"Window Size: %d\n",RUU_size);
1391
  fprintf(stderr,"Number of Virtual Registers: %d\n",MD_NUM_IREGS);
1392
  fprintf(stderr,"Number of Physical Registers: %d\n",RUU_size);
1393
  fprintf(stderr,"Datapath Width: %d\n",data_width);
1394

    
1395
  fprintf(stderr,"Total Power Consumption: %g\n",total_power+ambient_power);
1396
  fprintf(stderr,"Branch Predictor Power Consumption: %g  (%.3g%%)\n",bpred_power,100*bpred_power/total_power);
1397
  fprintf(stderr," branch target buffer power (W): %g\n",power->btb);
1398
  fprintf(stderr," local predict power (W): %g\n",power->local_predict);
1399
  fprintf(stderr," global predict power (W): %g\n",power->global_predict);
1400
  fprintf(stderr," chooser power (W): %g\n",power->chooser);
1401
  fprintf(stderr," RAS power (W): %g\n",power->ras);
1402
  fprintf(stderr,"Rename Logic Power Consumption: %g  (%.3g%%)\n",rename_power,100*rename_power/total_power);
1403
  fprintf(stderr," Instruction Decode Power (W): %g\n",power->inst_decoder_power);
1404
  fprintf(stderr," RAT decode_power (W): %g\n",power->rat_decoder);
1405
  fprintf(stderr," RAT wordline_power (W): %g\n",power->rat_wordline);
1406
  fprintf(stderr," RAT bitline_power (W): %g\n",power->rat_bitline);
1407
  fprintf(stderr," DCL Comparators (W): %g\n",power->dcl_compare);
1408
  fprintf(stderr,"Instruction Window Power Consumption: %g  (%.3g%%)\n",window_power,100*window_power/total_power);
1409
  fprintf(stderr," tagdrive (W): %g\n",power->wakeup_tagdrive);
1410
  fprintf(stderr," tagmatch (W): %g\n",power->wakeup_tagmatch);
1411
  fprintf(stderr," Selection Logic (W): %g\n",power->selection);
1412
  fprintf(stderr," decode_power (W): %g\n",power->rs_decoder);
1413
  fprintf(stderr," wordline_power (W): %g\n",power->rs_wordline);
1414
  fprintf(stderr," bitline_power (W): %g\n",power->rs_bitline);
1415
  fprintf(stderr,"Load/Store Queue Power Consumption: %g  (%.3g%%)\n",lsq_power,100*lsq_power/total_power);
1416
  fprintf(stderr," tagdrive (W): %g\n",power->lsq_wakeup_tagdrive);
1417
  fprintf(stderr," tagmatch (W): %g\n",power->lsq_wakeup_tagmatch);
1418
  fprintf(stderr," decode_power (W): %g\n",power->lsq_rs_decoder);
1419
  fprintf(stderr," wordline_power (W): %g\n",power->lsq_rs_wordline);
1420
  fprintf(stderr," bitline_power (W): %g\n",power->lsq_rs_bitline);
1421
  fprintf(stderr,"Arch. Register File Power Consumption: %g  (%.3g%%)\n",regfile_power,100*regfile_power/total_power);
1422
  fprintf(stderr," decode_power (W): %g\n",power->regfile_decoder);
1423
  fprintf(stderr," wordline_power (W): %g\n",power->regfile_wordline);
1424
  fprintf(stderr," bitline_power (W): %g\n",power->regfile_bitline);
1425
  fprintf(stderr,"Result Bus Power Consumption: %g  (%.3g%%)\n",power->resultbus,100*power->resultbus/total_power);
1426
  fprintf(stderr,"Total Clock Power: %g  (%.3g%%)\n",power->clock_power,100*power->clock_power/total_power);
1427
  fprintf(stderr,"Int ALU Power: %g  (%.3g%%)\n",power->ialu_power,100*power->ialu_power/total_power);
1428
  fprintf(stderr,"FP ALU Power: %g  (%.3g%%)\n",power->falu_power,100*power->falu_power/total_power);
1429
  fprintf(stderr,"Instruction Cache Power Consumption: %g  (%.3g%%)\n",icache_power,100*icache_power/total_power);
1430
  fprintf(stderr," decode_power (W): %g\n",power->icache_decoder);
1431
  fprintf(stderr," wordline_power (W): %g\n",power->icache_wordline);
1432
  fprintf(stderr," bitline_power (W): %g\n",power->icache_bitline);
1433
  fprintf(stderr," senseamp_power (W): %g\n",power->icache_senseamp);
1434
  fprintf(stderr," tagarray_power (W): %g\n",power->icache_tagarray);
1435
  fprintf(stderr,"Itlb_power (W): %g (%.3g%%)\n",power->itlb,100*power->itlb/total_power);
1436
  fprintf(stderr,"Data Cache Power Consumption: %g  (%.3g%%)\n",dcache_power,100*dcache_power/total_power);
1437
  fprintf(stderr," decode_power (W): %g\n",power->dcache_decoder);
1438
  fprintf(stderr," wordline_power (W): %g\n",power->dcache_wordline);
1439
  fprintf(stderr," bitline_power (W): %g\n",power->dcache_bitline);
1440
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache_senseamp);
1441
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache_tagarray);
1442
  fprintf(stderr,"Dtlb_power (W): %g (%.3g%%)\n",power->dtlb,100*power->dtlb/total_power);
1443
  fprintf(stderr,"Level 2 Cache Power Consumption: %g (%.3g%%)\n",dcache2_power,100*dcache2_power/total_power);
1444
  fprintf(stderr," decode_power (W): %g\n",power->dcache2_decoder);
1445
  fprintf(stderr," wordline_power (W): %g\n",power->dcache2_wordline);
1446
  fprintf(stderr," bitline_power (W): %g\n",power->dcache2_bitline);
1447
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache2_senseamp);
1448
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache2_tagarray);
1449
}
1450

    
1451
/*======================================================================*/
1452

    
1453

    
1454

    
1455
/* 
1456
 * This part of the code contains routines for each section as
1457
 * described in the tech report.  See the tech report for more details
1458
 * and explanations */
1459

    
1460
/*----------------------------------------------------------------------*/
1461

    
1462
double driver_size(double driving_cap, double desiredrisetime) {
1463
  double nsize, psize;
1464
  double Rpdrive; 
1465

    
1466
  Rpdrive = desiredrisetime/(driving_cap*log(VSINV)*-1.0);
1467
  psize = restowidth(Rpdrive,PCH);
1468
  nsize = restowidth(Rpdrive,NCH);
1469
  if (psize > Wworddrivemax) {
1470
    psize = Wworddrivemax;
1471
  }
1472
  if (psize < 4.0 * LSCALE)
1473
    psize = 4.0 * LSCALE;
1474

    
1475
  return (psize);
1476

    
1477
}
1478

    
1479
/* Decoder delay:  (see section 6.1 of tech report) */
1480

    
1481
double array_decoder_power(rows,cols,predeclength,rports,wports,cache)
1482
     int rows,cols;
1483
     double predeclength;
1484
     int rports,wports;
1485
     int cache;
1486
{
1487
  double Ctotal=0;
1488
  double Ceq=0;
1489
  int numstack;
1490
  int decode_bits=0;
1491
  int ports;
1492
  double rowsb;
1493

    
1494
  /* read and write ports are the same here */
1495
  ports = rports + wports;
1496

    
1497
  rowsb = (double)rows;
1498

    
1499
  /* number of input bits to be decoded */
1500
  decode_bits=ceil((logtwo(rowsb)));
1501

    
1502
  /* First stage: driving the decoders */
1503

    
1504
  /* This is the capacitance for driving one bit (and its complement).
1505
     -There are #rowsb 3->8 decoders contributing gatecap.
1506
     - 2.0 factor from 2 identical sets of drivers in parallel
1507
  */
1508
  Ceq = 2.0*(draincap(Wdecdrivep,PCH,1)+draincap(Wdecdriven,NCH,1)) +
1509
    gatecap(Wdec3to8n+Wdec3to8p,10.0)*rowsb;
1510

    
1511
  /* There are ports * #decode_bits total */
1512
  Ctotal+=ports*decode_bits*Ceq;
1513

    
1514
  if(verbose)
1515
    fprintf(stderr,"Decoder -- Driving decoders            == %g\n",.3*Ctotal*Powerfactor);
1516

    
1517
  /* second stage: driving a bunch of nor gates with a nand 
1518
     numstack is the size of the nor gates -- ie. a 7-128 decoder has
1519
     3-input NAND followed by 3-input NOR  */
1520

    
1521
  numstack = ceil((1.0/3.0)*logtwo(rows));
1522

    
1523
  if (numstack<=0) numstack = 1;
1524
  if (numstack>5) numstack = 5;
1525

    
1526
  /* There are #rowsb NOR gates being driven*/
1527
  Ceq = (3.0*draincap(Wdec3to8p,PCH,1) +draincap(Wdec3to8n,NCH,3) +
1528
         gatecap(WdecNORn+WdecNORp,((numstack*40)+20.0)))*rowsb;
1529

    
1530
  Ctotal+=ports*Ceq;
1531

    
1532
  if(verbose)
1533
    fprintf(stderr,"Decoder -- Driving nor w/ nand         == %g\n",.3*ports*Ceq*Powerfactor);
1534

    
1535
  /* Final stage: driving an inverter with the nor 
1536
     (inverter preceding wordline driver) -- wordline driver is in the next section*/
1537

    
1538
  Ceq = (gatecap(Wdecinvn+Wdecinvp,20.0)+
1539
         numstack*draincap(WdecNORn,NCH,1)+
1540
         draincap(WdecNORp,PCH,numstack));
1541

    
1542
  if(verbose)
1543
    fprintf(stderr,"Decoder -- Driving inverter w/ nor     == %g\n",.3*ports*Ceq*Powerfactor);
1544

    
1545
  Ctotal+=ports*Ceq;
1546

    
1547
  /* assume Activity Factor == .3  */
1548

    
1549
  return(.3*Ctotal*Powerfactor);
1550
}
1551

    
1552
double simple_array_decoder_power(rows,cols,rports,wports,cache)
1553
     int rows,cols;
1554
     int rports,wports;
1555
     int cache;
1556
{
1557
  double predeclength=0.0;
1558
  return(array_decoder_power(rows,cols,predeclength,rports,wports,cache));
1559
}
1560

    
1561

    
1562
double array_wordline_power(rows,cols,wordlinelength,rports,wports,cache)
1563
     int rows,cols;
1564
     double wordlinelength;
1565
     int rports,wports;
1566
     int cache;
1567
{
1568
  double Ctotal=0;
1569
  double Ceq=0;
1570
  double Cline=0;
1571
  double Cliner, Clinew=0;
1572
  double desiredrisetime,psize,nsize;
1573
  int ports;
1574
  double colsb;
1575

    
1576
  ports = rports+wports;
1577

    
1578
  colsb = (double)cols;
1579

    
1580
  /* Calculate size of wordline drivers assuming rise time == Period / 8 
1581
     - estimate cap on line 
1582
     - compute min resistance to achieve this with RC 
1583
     - compute width needed to achieve this resistance */
1584

    
1585
  desiredrisetime = Period/16;
1586
  Cline = (gatecappass(Wmemcellr,1.0))*colsb + wordlinelength*CM3metal;
1587
  psize = driver_size(Cline,desiredrisetime);
1588
  
1589
  /* how do we want to do p-n ratioing? -- here we just assume the same ratio 
1590
     from an inverter pair  */
1591
  nsize = psize * Wdecinvn/Wdecinvp; 
1592
  
1593
  if(verbose)
1594
    fprintf(stderr,"Wordline Driver Sizes -- nsize == %f, psize == %f\n",nsize,psize);
1595

    
1596
  Ceq = draincap(Wdecinvn,NCH,1) + draincap(Wdecinvp,PCH,1) +
1597
    gatecap(nsize+psize,20.0);
1598

    
1599
  Ctotal+=ports*Ceq;
1600

    
1601
  if(verbose)
1602
    fprintf(stderr,"Wordline -- Inverter -> Driver         == %g\n",ports*Ceq*Powerfactor);
1603

    
1604
  /* Compute caps of read wordline and write wordlines 
1605
     - wordline driver caps, given computed width from above
1606
     - read wordlines have 1 nmos access tx, size ~4
1607
     - write wordlines have 2 nmos access tx, size ~2
1608
     - metal line cap
1609
  */
1610

    
1611
  Cliner = (gatecappass(Wmemcellr,(BitWidth-2*Wmemcellr)/2.0))*colsb+
1612
    wordlinelength*CM3metal+
1613
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1614
  Clinew = (2.0*gatecappass(Wmemcellw,(BitWidth-2*Wmemcellw)/2.0))*colsb+
1615
    wordlinelength*CM3metal+
1616
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1617

    
1618
  if(verbose) {
1619
    fprintf(stderr,"Wordline -- Line                       == %g\n",1e12*Cline);
1620
    fprintf(stderr,"Wordline -- Line -- access -- gatecap  == %g\n",1e12*colsb*2*gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0));
1621
    fprintf(stderr,"Wordline -- Line -- driver -- draincap == %g\n",1e12*draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1622
    fprintf(stderr,"Wordline -- Line -- metal              == %g\n",1e12*wordlinelength*CM3metal);
1623
  }
1624
  Ctotal+=rports*Cliner+wports*Clinew;
1625

    
1626
  /* AF == 1 assuming a different wordline is charged each cycle, but only
1627
     1 wordline (per port) is actually used */
1628

    
1629
  return(Ctotal*Powerfactor);
1630
}
1631

    
1632
double simple_array_wordline_power(rows,cols,rports,wports,cache)
1633
     int rows,cols;
1634
     int rports,wports;
1635
     int cache;
1636
{
1637
  double wordlinelength;
1638
  int ports = rports + wports;
1639
  wordlinelength = cols *  (RegCellWidth + 2 * ports * BitlineSpacing);
1640
  return(array_wordline_power(rows,cols,wordlinelength,rports,wports,cache));
1641
}
1642

    
1643

    
1644
double array_bitline_power(rows,cols,bitlinelength,rports,wports,cache)
1645
     int rows,cols;
1646
     double bitlinelength;
1647
     int rports,wports;
1648
     int cache;
1649
{
1650
  double Ctotal=0;
1651
  double Ccolmux=0;
1652
  double Cbitrowr=0;
1653
  double Cbitroww=0;
1654
  double Cprerow=0;
1655
  double Cwritebitdrive=0;
1656
  double Cpregate=0;
1657
  double Cliner=0;
1658
  double Clinew=0;
1659
  int ports;
1660
  double rowsb;
1661
  double colsb;
1662

    
1663
  double desiredrisetime, Cline, psize, nsize;
1664

    
1665
  ports = rports + wports;
1666

    
1667
  rowsb = (double)rows;
1668
  colsb = (double)cols;
1669

    
1670
  /* Draincaps of access tx's */
1671

    
1672
  Cbitrowr = draincap(Wmemcellr,NCH,1);
1673
  Cbitroww = draincap(Wmemcellw,NCH,1);
1674

    
1675
  /* Cprerow -- precharge cap on the bitline
1676
     -simple scheme to estimate size of pre-charge tx's in a similar fashion
1677
      to wordline driver size estimation.
1678
     -FIXME: it would be better to use precharge/keeper pairs, i've omitted this
1679
      from this version because it couldn't autosize as easily.
1680
  */
1681

    
1682
  desiredrisetime = Period/8;
1683

    
1684
  Cline = rowsb*Cbitrowr+CM2metal*bitlinelength;
1685
  psize = driver_size(Cline,desiredrisetime);
1686

    
1687
  /* compensate for not having an nmos pre-charging */
1688
  psize = psize + psize * Wdecinvn/Wdecinvp; 
1689

    
1690
  if(verbose)
1691
    printf("Cprerow auto   == %g (psize == %g)\n",draincap(psize,PCH,1),psize);
1692

    
1693
  Cprerow = draincap(psize,PCH,1);
1694

    
1695
  /* Cpregate -- cap due to gatecap of precharge transistors -- tack this
1696
     onto bitline cap, again this could have a keeper */
1697
  Cpregate = 4.0*gatecap(psize,10.0);
1698
  global_clockcap+=rports*cols*2.0*Cpregate;
1699

    
1700
  /* Cwritebitdrive -- write bitline drivers are used instead of the precharge
1701
     stuff for write bitlines
1702
     - 2 inverter drivers within each driver pair */
1703

    
1704
  Cline = rowsb*Cbitroww+CM2metal*bitlinelength;
1705

    
1706
  psize = driver_size(Cline,desiredrisetime);
1707
  nsize = psize * Wdecinvn/Wdecinvp; 
1708

    
1709
  Cwritebitdrive = 2.0*(draincap(psize,PCH,1)+draincap(nsize,NCH,1));
1710

    
1711
  /* 
1712
     reg files (cache==0) 
1713
     => single ended bitlines (1 bitline/col)
1714
     => AFs from pop_count
1715
     caches (cache ==1)
1716
     => double-ended bitlines (2 bitlines/col)
1717
     => AFs = .5 (since one of the two bitlines is always charging/discharging)
1718
  */
1719

    
1720
#ifdef STATIC_AF
1721
  if (cache == 0) {
1722
    /* compute the total line cap for read/write bitlines */
1723
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1724
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1725

    
1726
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1727
       in cache styles) */
1728
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1729
    Ctotal+=(1.0-POPCOUNT_AF)*rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1730
    Ctotal+=.3*wports*cols*(Clinew+Cwritebitdrive);
1731
  } 
1732
  else { 
1733
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1734
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1735
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1736
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1737
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1738
  }
1739
#else
1740
  if (cache == 0) {
1741
    /* compute the total line cap for read/write bitlines */
1742
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1743
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1744

    
1745
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1746
       in cache styles) */
1747
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1748
    Ctotal += rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1749
    Ctotal += .3*wports*cols*(Clinew+Cwritebitdrive);
1750
  } 
1751
  else { 
1752
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1753
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1754
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1755
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1756
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1757
  }
1758
#endif
1759

    
1760
  if(verbose) {
1761
    fprintf(stderr,"Bitline -- Precharge                   == %g\n",1e12*Cpregate);
1762
    fprintf(stderr,"Bitline -- Line                        == %g\n",1e12*(Cliner+Clinew));
1763
    fprintf(stderr,"Bitline -- Line -- access draincap     == %g\n",1e12*rowsb*Cbitrowr);
1764
    fprintf(stderr,"Bitline -- Line -- precharge draincap  == %g\n",1e12*Cprerow);
1765
    fprintf(stderr,"Bitline -- Line -- metal               == %g\n",1e12*bitlinelength*CM2metal);
1766
    fprintf(stderr,"Bitline -- Colmux                      == %g\n",1e12*Ccolmux);
1767

    
1768
    fprintf(stderr,"\n");
1769
  }
1770

    
1771

    
1772
  if(cache==0)
1773
    return(Ctotal*Powerfactor);
1774
  else
1775
    return(Ctotal*SensePowerfactor*.4);
1776
  
1777
}
1778

    
1779

    
1780
double simple_array_bitline_power(rows,cols,rports,wports,cache)
1781
     int rows,cols;
1782
     int rports,wports;
1783
     int cache;
1784
{
1785
  double bitlinelength;
1786

    
1787
  int ports = rports + wports;
1788

    
1789
  bitlinelength = rows * (RegCellHeight + ports * WordlineSpacing);
1790

    
1791
  return (array_bitline_power(rows,cols,bitlinelength,rports,wports,cache));
1792

    
1793
}
1794

    
1795
/* estimate senseamp power dissipation in cache structures (Zyuban's method) */
1796
double senseamp_power(int cols)
1797
{
1798
  return((double)cols * Vdd/8 * .5e-3);
1799
}
1800

    
1801
/* estimate comparator power consumption (this comparator is similar
1802
   to the tag-match structure in a CAM */
1803
double compare_cap(int compare_bits)
1804
{
1805
  double c1, c2;
1806
  /* bottom part of comparator */
1807
  c2 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2))+
1808
    draincap(Wevalinvp,PCH,1) + draincap(Wevalinvn,NCH,1);
1809

    
1810
  /* top part of comparator */
1811
  c1 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2)+
1812
                       draincap(Wcomppreequ,NCH,1)) +
1813
    gatecap(WdecNORn,1.0)+
1814
    gatecap(WdecNORp,3.0);
1815

    
1816
  return(c1 + c2);
1817
}
1818

    
1819
/* power of depency check logic */
1820
double dcl_compare_power(int compare_bits)
1821
{
1822
  double Ctotal;
1823
  int num_comparators;
1824
  
1825
  num_comparators = (ruu_decode_width - 1) * (ruu_decode_width);
1826

    
1827
  Ctotal = num_comparators * compare_cap(compare_bits);
1828

    
1829
  return(Ctotal*Powerfactor*AF);
1830
}
1831

    
1832
double simple_array_power(rows,cols,rports,wports,cache)
1833
     int rows,cols;
1834
     int rports,wports;
1835
     int cache;
1836
{
1837
  if(cache==0)
1838
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1839
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1840
            simple_array_bitline_power(rows,cols,rports,wports,cache));
1841
  else
1842
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1843
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1844
            simple_array_bitline_power(rows,cols,rports,wports,cache)+
1845
            senseamp_power(cols));
1846
}
1847

    
1848

    
1849
double cam_tagdrive(rows,cols,rports,wports)
1850
     int rows,cols,rports,wports;
1851
{
1852
  double Ctotal, Ctlcap, Cblcap, Cwlcap;
1853
  double taglinelength;
1854
  double wordlinelength;
1855
  double nsize, psize;
1856
  int ports;
1857
  Ctotal=0;
1858

    
1859
  ports = rports + wports;
1860

    
1861
  taglinelength = rows * 
1862
    (CamCellHeight + ports * MatchlineSpacing);
1863

    
1864
  wordlinelength = cols * 
1865
    (CamCellWidth + ports * TaglineSpacing);
1866

    
1867
  /* Compute tagline cap */
1868
  Ctlcap = Cmetal * taglinelength + 
1869
    rows * gatecappass(Wcomparen2,2.0) +
1870
    draincap(Wcompdrivern,NCH,1)+draincap(Wcompdriverp,PCH,1);
1871

    
1872
  /* Compute bitline cap (for writing new tags) */
1873
  Cblcap = Cmetal * taglinelength +
1874
    rows * draincap(Wmemcellr,NCH,2);
1875

    
1876
  /* autosize wordline driver */
1877
  psize = driver_size(Cmetal * wordlinelength + 2 * cols * gatecap(Wmemcellr,2.0),Period/8);
1878
  nsize = psize * Wdecinvn/Wdecinvp; 
1879

    
1880
  /* Compute wordline cap (for writing new tags) */
1881
  Cwlcap = Cmetal * wordlinelength + 
1882
    draincap(nsize,NCH,1)+draincap(psize,PCH,1) +
1883
    2 * cols * gatecap(Wmemcellr,2.0);
1884
    
1885
  Ctotal += (rports * cols * 2 * Ctlcap) + 
1886
    (wports * ((cols * 2 * Cblcap) + (rows * Cwlcap)));
1887

    
1888
  return(Ctotal*Powerfactor*AF);
1889
}
1890

    
1891
double cam_tagmatch(rows,cols,rports,wports)
1892
     int rows,cols,rports,wports;
1893
{
1894
  double Ctotal, Cmlcap;
1895
  double matchlinelength;
1896
  int ports;
1897
  Ctotal=0;
1898

    
1899
  ports = rports + wports;
1900

    
1901
  matchlinelength = cols * 
1902
    (CamCellWidth + ports * TaglineSpacing);
1903

    
1904
  Cmlcap = 2 * cols * draincap(Wcomparen1,NCH,2) + 
1905
    Cmetal * matchlinelength + draincap(Wmatchpchg,NCH,1) +
1906
    gatecap(Wmatchinvn+Wmatchinvp,10.0) +
1907
    gatecap(Wmatchnandn+Wmatchnandp,10.0);
1908

    
1909
  Ctotal += rports * rows * Cmlcap;
1910

    
1911
  global_clockcap += rports * rows * gatecap(Wmatchpchg,5.0);
1912
  
1913
  /* noring the nanded match lines */
1914
  if(ruu_issue_width >= 8)
1915
    Ctotal += 2 * gatecap(Wmatchnorn+Wmatchnorp,10.0);
1916

    
1917
  return(Ctotal*Powerfactor*AF);
1918
}
1919

    
1920
double cam_array(rows,cols,rports,wports)
1921
     int rows,cols,rports,wports;
1922
{
1923
  return(cam_tagdrive(rows,cols,rports,wports) +
1924
         cam_tagmatch(rows,cols,rports,wports));
1925
}
1926

    
1927

    
1928
double selection_power(int win_entries)
1929
{
1930
  double Ctotal, Cor, Cpencode;
1931
  int num_arbiter=1;
1932

    
1933
  Ctotal=0;
1934

    
1935
  while(win_entries > 4)
1936
    {
1937
      win_entries = (int)ceil((double)win_entries / 4.0);
1938
      num_arbiter += win_entries;
1939
    }
1940

    
1941
  Cor = 4 * draincap(WSelORn,NCH,1) + draincap(WSelORprequ,PCH,1);
1942

    
1943
  Cpencode = draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,1) + 
1944
    2*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,2) + 
1945
    3*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,3) + 
1946
    4*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,4) + 
1947
    4*gatecap(WSelEnn+WSelEnp,20.0) + 
1948
    4*draincap(WSelEnn,NCH,1) + 4*draincap(WSelEnp,PCH,1);
1949

    
1950
  Ctotal += ruu_issue_width * num_arbiter*(Cor+Cpencode);
1951

    
1952
  return(Ctotal*Powerfactor*AF);
1953
}
1954

    
1955
/* very rough clock power estimates */
1956
double total_clockpower(double die_length)
1957
{
1958

    
1959
  double clocklinelength;
1960
  double Cline,Cline2,Ctotal;
1961
  double pipereg_clockcap=0;
1962
  double global_buffercap = 0;
1963
  double Clockpower;
1964

    
1965
  double num_piperegs;
1966

    
1967
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
1968

    
1969
  /* Assume say 8 stages (kinda low now).
1970
     FIXME: this could be a lot better; user could input
1971
     number of pipestages, etc  */
1972

    
1973
  /* assume 8 pipe stages and try to estimate bits per pipe stage */
1974
  /* pipe stage 0/1 */
1975
  num_piperegs = ruu_issue_width*inst_length + data_width;
1976
  /* pipe stage 1/2 */
1977
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1978
  /* pipe stage 2/3 */
1979
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1980
  /* pipe stage 3/4 */
1981
  num_piperegs += ruu_issue_width*(3 * npreg_width + pow2(opcode_length));
1982
  /* pipe stage 4/5 */
1983
  num_piperegs += ruu_issue_width*(2*data_width + pow2(opcode_length));
1984
  /* pipe stage 5/6 */
1985
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1986
  /* pipe stage 6/7 */
1987
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1988
  /* pipe stage 7/8 */
1989
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1990

    
1991
  /* assume 50% extra in control signals (rule of thumb) */
1992
  num_piperegs = num_piperegs * 1.5;
1993

    
1994
  pipereg_clockcap = num_piperegs * 4*gatecap(10.0,0);
1995

    
1996
  /* estimate based on 3% of die being in clock metal */
1997
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
1998

    
1999
  /* another estimate */
2000
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
2001
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
2002
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
2003
  /* global_clockcap is computed within each array structure for pre-charge tx's*/
2004
  Ctotal = Cline+global_clockcap+pipereg_clockcap+global_buffercap;
2005

    
2006
  if(verbose)
2007
    fprintf(stderr,"num_piperegs == %f\n",num_piperegs);
2008

    
2009
  /* add I_ADD Clockcap and F_ADD Clockcap */
2010
  Clockpower = Ctotal*Powerfactor + res_ialu*I_ADD_CLOCK + res_fpalu*F_ADD_CLOCK;
2011

    
2012
  if(verbose) {
2013
    fprintf(stderr,"Global Clock Power: %g\n",Clockpower);
2014
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
2015
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
2016
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
2017
    fprintf(stderr," Global Clock Cap (Explicit) (W): %g\n",global_clockcap*Powerfactor+I_ADD_CLOCK+F_ADD_CLOCK);
2018
    fprintf(stderr," Global Clock Cap (Implicit) (W): %g\n",pipereg_clockcap*Powerfactor);
2019
  }
2020
  return(Clockpower);
2021

    
2022
}
2023

    
2024
/* very rough global clock power estimates */
2025
double global_clockpower(double die_length)
2026
{
2027

    
2028
  double clocklinelength;
2029
  double Cline,Cline2,Ctotal;
2030
  double global_buffercap = 0;
2031

    
2032
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
2033

    
2034
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
2035
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
2036
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
2037
  Ctotal = Cline+global_buffercap;
2038

    
2039
  if(verbose) {
2040
    fprintf(stderr,"Global Clock Power: %g\n",Ctotal*Powerfactor);
2041
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
2042
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
2043
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
2044
  }
2045

    
2046
  return(Ctotal*Powerfactor);
2047

    
2048
}
2049

    
2050

    
2051
double compute_resultbus_power()
2052
{
2053
  double Ctotal, Cline;
2054

    
2055
  double regfile_height;
2056

    
2057
  /* compute size of result bus tags */
2058
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2059

    
2060
  Ctotal=0;
2061

    
2062
  regfile_height = RUU_size * (RegCellHeight + 
2063
                               WordlineSpacing * 3 * ruu_issue_width); 
2064

    
2065
  /* assume num alu's == ialu  (FIXME: generate a more detailed result bus network model*/
2066
  Cline = Cmetal * (regfile_height + .5 * res_ialu * 3200.0 * LSCALE);
2067

    
2068
  /* or use result bus length measured from 21264 die photo */
2069
  /*  Cline = Cmetal * 3.3*1000;*/
2070

    
2071
  /* Assume ruu_issue_width result busses -- power can be scaled linearly
2072
     for number of result busses (scale by writeback_access) */
2073
  Ctotal += 2.0 * (data_width + npreg_width) * (ruu_issue_width)* Cline;
2074

    
2075
#ifdef STATIC_AF
2076
  return(Ctotal*Powerfactor*AF);
2077
#else
2078
  return(Ctotal*Powerfactor);
2079
#endif
2080
  
2081
}
2082

    
2083
void calculate_power(power)
2084
     power_result_type *power;
2085
{
2086
  double clockpower;
2087
  double predeclength, wordlinelength, bitlinelength;
2088
  int ndwl, ndbl, nspd, ntwl, ntbl, ntspd, c,b,a,cache, rowsb, colsb;
2089
  int trowsb, tcolsb, tagsize;
2090
  int va_size = 48;
2091

    
2092
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2093

    
2094
  /* these variables are needed to use Cacti to auto-size cache arrays 
2095
     (for optimal delay) */
2096
  time_result_type time_result;
2097
  time_parameter_type time_parameters;
2098

    
2099
  /* used to autosize other structures, like bpred tables */
2100
  int scale_factor;
2101

    
2102
  global_clockcap = 0;
2103

    
2104
  cache=0;
2105

    
2106

    
2107
  /* FIXME: ALU power is a simple constant, it would be better
2108
     to include bit AFs and have different numbers for different
2109
     types of operations */
2110
  power->ialu_power = res_ialu * I_ADD;
2111
  power->falu_power = res_fpalu * F_ADD;
2112

    
2113
  nvreg_width = (int)ceil(logtwo((double)MD_NUM_IREGS));
2114
  npreg_width = (int)ceil(logtwo((double)RUU_size));
2115

    
2116

    
2117
  /* RAT has shadow bits stored in each cell, this makes the
2118
     cell size larger than normal array structures, so we must
2119
     compute it here */
2120

    
2121
  predeclength = MD_NUM_IREGS * 
2122
    (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2123

    
2124
  wordlinelength = npreg_width * 
2125
    (RatCellWidth + 
2126
     6 * ruu_decode_width * BitlineSpacing + 
2127
     RatShiftRegWidth*RatNumShift);
2128

    
2129
  bitlinelength = MD_NUM_IREGS * (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2130

    
2131
  if(verbose)
2132
    fprintf(stderr,"rat power stats\n");
2133
  power->rat_decoder = array_decoder_power(MD_NUM_IREGS,npreg_width,predeclength,2*ruu_decode_width,ruu_decode_width,cache);
2134
  power->rat_wordline = array_wordline_power(MD_NUM_IREGS,npreg_width,wordlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2135
  power->rat_bitline = array_bitline_power(MD_NUM_IREGS,npreg_width,bitlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2136
  power->rat_senseamp = 0;
2137

    
2138
  power->dcl_compare = dcl_compare_power(nvreg_width);
2139
  power->dcl_pencode = 0;
2140
  power->inst_decoder_power = ruu_decode_width * simple_array_decoder_power(opcode_length,1,1,1,cache);
2141
  power->wakeup_tagdrive =cam_tagdrive(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2142
  power->wakeup_tagmatch =cam_tagmatch(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2143
  power->wakeup_ormatch =0; 
2144

    
2145
  power->selection = selection_power(RUU_size);
2146

    
2147

    
2148
  predeclength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2149

    
2150
  wordlinelength = data_width * 
2151
    (RegCellWidth + 
2152
     6 * ruu_issue_width * BitlineSpacing);
2153

    
2154
  bitlinelength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2155

    
2156
  if(verbose)
2157
    fprintf(stderr,"regfile power stats\n");
2158

    
2159
  power->regfile_decoder = array_decoder_power(MD_NUM_IREGS,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2160
  power->regfile_wordline = array_wordline_power(MD_NUM_IREGS,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2161
  power->regfile_bitline = array_bitline_power(MD_NUM_IREGS,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2162
  power->regfile_senseamp =0;
2163

    
2164
  predeclength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2165

    
2166
  wordlinelength = data_width * 
2167
    (RegCellWidth + 
2168
     6 * ruu_issue_width * BitlineSpacing);
2169

    
2170
  bitlinelength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2171

    
2172
  if(verbose)
2173
    fprintf(stderr,"res station power stats\n");
2174
  power->rs_decoder = array_decoder_power(RUU_size,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2175
  power->rs_wordline = array_wordline_power(RUU_size,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2176
  power->rs_bitline = array_bitline_power(RUU_size,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2177
  /* no senseamps in reg file structures (only caches) */
2178
  power->rs_senseamp =0;
2179

    
2180
  /* addresses go into lsq tag's */
2181
  power->lsq_wakeup_tagdrive =cam_tagdrive(LSQ_size,data_width,res_memport,res_memport);
2182
  power->lsq_wakeup_tagmatch =cam_tagmatch(LSQ_size,data_width,res_memport,res_memport);
2183
  power->lsq_wakeup_ormatch =0; 
2184

    
2185
  wordlinelength = data_width * 
2186
    (RegCellWidth + 
2187
     4 * res_memport * BitlineSpacing);
2188

    
2189
  bitlinelength = RUU_size * (RegCellHeight + 4 * res_memport * WordlineSpacing);
2190

    
2191
  /* rs's hold data */
2192
  if(verbose)
2193
    fprintf(stderr,"lsq station power stats\n");
2194
  power->lsq_rs_decoder = array_decoder_power(LSQ_size,data_width,predeclength,res_memport,res_memport,cache);
2195
  power->lsq_rs_wordline = array_wordline_power(LSQ_size,data_width,wordlinelength,res_memport,res_memport,cache);
2196
  power->lsq_rs_bitline = array_bitline_power(LSQ_size,data_width,bitlinelength,res_memport,res_memport,cache);
2197
  power->lsq_rs_senseamp =0;
2198

    
2199
  power->resultbus = compute_resultbus_power();
2200

    
2201
  /* Load cache values into what cacti is expecting */
2202
  time_parameters.cache_size = btb_config[0] * (data_width/8) * btb_config[1]; /* C */
2203
  time_parameters.block_size = (data_width/8); /* B */
2204
  time_parameters.associativity = btb_config[1]; /* A */
2205
  time_parameters.number_of_sets = btb_config[0]; /* C/(B*A) */
2206

    
2207
  /* have Cacti compute optimal cache config */
2208
  calculate_time(&time_result,&time_parameters);
2209
  output_data(&time_result,&time_parameters);
2210

    
2211
  /* extract Cacti results */
2212
  ndwl=time_result.best_Ndwl;
2213
  ndbl=time_result.best_Ndbl;
2214
  nspd=time_result.best_Nspd;
2215
  ntwl=time_result.best_Ntwl;
2216
  ntbl=time_result.best_Ntbl;
2217
  ntspd=time_result.best_Ntspd;
2218
  c = time_parameters.cache_size;
2219
  b = time_parameters.block_size;
2220
  a = time_parameters.associativity; 
2221

    
2222
  cache=1;
2223

    
2224
  /* Figure out how many rows/cols there are now */
2225
  rowsb = c/(b*a*ndbl*nspd);
2226
  colsb = 8*b*a*nspd/ndwl;
2227

    
2228
  if(verbose) {
2229
    fprintf(stderr,"%d KB %d-way btb (%d-byte block size):\n",c,a,b);
2230
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2231
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2232
  }
2233

    
2234
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2235
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2236
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2237

    
2238
  if(verbose)
2239
    fprintf(stderr,"btb power stats\n");
2240
  power->btb = ndwl*ndbl*(array_decoder_power(rowsb,colsb,predeclength,1,1,cache) + array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache) + array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache) + senseamp_power(colsb));
2241

    
2242
  cache=1;
2243

    
2244
  scale_factor = squarify(twolev_config[0],twolev_config[2]);
2245
  predeclength = (twolev_config[0] / scale_factor)* (RegCellHeight + WordlineSpacing);
2246
  wordlinelength = twolev_config[2] * scale_factor *  (RegCellWidth + BitlineSpacing);
2247
  bitlinelength = (twolev_config[0] / scale_factor) * (RegCellHeight + WordlineSpacing);
2248

    
2249
  if(verbose)
2250
    fprintf(stderr,"local predict power stats\n");
2251

    
2252
  power->local_predict = array_decoder_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,bitlinelength,1,1,cache) + senseamp_power(twolev_config[2]*scale_factor);
2253

    
2254
  scale_factor = squarify(twolev_config[1],3);
2255

    
2256
  predeclength = (twolev_config[1] / scale_factor)* (RegCellHeight + WordlineSpacing);
2257
  wordlinelength = 3 * scale_factor *  (RegCellWidth + BitlineSpacing);
2258
  bitlinelength = (twolev_config[1] / scale_factor) * (RegCellHeight + WordlineSpacing);
2259

    
2260

    
2261
  if(verbose)
2262
    fprintf(stderr,"local predict power stats\n");
2263
  power->local_predict += array_decoder_power(twolev_config[1]/scale_factor,3*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[1]/scale_factor,3*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[1]/scale_factor,3*scale_factor,bitlinelength,1,1,cache) + senseamp_power(3*scale_factor);
2264

    
2265
  if(verbose)
2266
    fprintf(stderr,"bimod_config[0] == %d\n",bimod_config[0]);
2267

    
2268
  scale_factor = squarify(bimod_config[0],2);
2269

    
2270
  predeclength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2271
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2272
  bitlinelength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2273

    
2274

    
2275
  if(verbose)
2276
    fprintf(stderr,"global predict power stats\n");
2277
  power->global_predict = array_decoder_power(bimod_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(bimod_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(bimod_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2278

    
2279
  scale_factor = squarify(comb_config[0],2);
2280

    
2281
  predeclength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2282
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2283
  bitlinelength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2284

    
2285
  if(verbose)
2286
    fprintf(stderr,"chooser predict power stats\n");
2287
  power->chooser = array_decoder_power(comb_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(comb_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(comb_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2288

    
2289
  if(verbose)
2290
    fprintf(stderr,"RAS predict power stats\n");
2291
  power->ras = simple_array_power(ras_size,data_width,1,1,0);
2292

    
2293
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2294

    
2295
  if(verbose)
2296
    fprintf(stderr,"dtlb predict power stats\n");
2297
  power->dtlb = res_memport*(cam_array(dtlb->nsets, va_size - (int)logtwo((double)dtlb->bsize),1,1) + simple_array_power(dtlb->nsets,tagsize,1,1,cache));
2298

    
2299
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2300

    
2301
  predeclength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2302
  wordlinelength = logtwo((double)itlb->bsize) * (RegCellWidth + BitlineSpacing);
2303
  bitlinelength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2304

    
2305
  if(verbose)
2306
    fprintf(stderr,"itlb predict power stats\n");
2307
  power->itlb = cam_array(itlb->nsets, va_size - (int)logtwo((double)itlb->bsize),1,1) + simple_array_power(itlb->nsets,tagsize,1,1,cache);
2308

    
2309

    
2310
  cache=1;
2311

    
2312
  time_parameters.cache_size = cache_il1->nsets * cache_il1->bsize * cache_il1->assoc; /* C */
2313
  time_parameters.block_size = cache_il1->bsize; /* B */
2314
  time_parameters.associativity = cache_il1->assoc; /* A */
2315
  time_parameters.number_of_sets = cache_il1->nsets; /* C/(B*A) */
2316

    
2317
  calculate_time(&time_result,&time_parameters);
2318
  output_data(&time_result,&time_parameters);
2319

    
2320
  ndwl=time_result.best_Ndwl;
2321
  ndbl=time_result.best_Ndbl;
2322
  nspd=time_result.best_Nspd;
2323
  ntwl=time_result.best_Ntwl;
2324
  ntbl=time_result.best_Ntbl;
2325
  ntspd=time_result.best_Ntspd;
2326

    
2327
  c = time_parameters.cache_size;
2328
  b = time_parameters.block_size;
2329
  a = time_parameters.associativity;
2330

    
2331
  rowsb = c/(b*a*ndbl*nspd);
2332
  colsb = 8*b*a*nspd/ndwl;
2333

    
2334
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2335
  trowsb = c/(b*a*ntbl*ntspd);
2336
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2337
 
2338
  if(verbose) {
2339
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2340
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2341
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2342
    fprintf(stderr,"tagsize == %d\n",tagsize);
2343
  }
2344

    
2345
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2346
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2347
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2348

    
2349
  if(verbose)
2350
    fprintf(stderr,"icache power stats\n");
2351
  power->icache_decoder = ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2352
  power->icache_wordline = ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2353
  power->icache_bitline = ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2354
  power->icache_senseamp = ndwl*ndbl*senseamp_power(colsb);
2355
  power->icache_tagarray = ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2356

    
2357
  power->icache_power = power->icache_decoder + power->icache_wordline + power->icache_bitline + power->icache_senseamp + power->icache_tagarray;
2358

    
2359
  time_parameters.cache_size = cache_dl1->nsets * cache_dl1->bsize * cache_dl1->assoc; /* C */
2360
  time_parameters.block_size = cache_dl1->bsize; /* B */
2361
  time_parameters.associativity = cache_dl1->assoc; /* A */
2362
  time_parameters.number_of_sets = cache_dl1->nsets; /* C/(B*A) */
2363

    
2364
  calculate_time(&time_result,&time_parameters);
2365
  output_data(&time_result,&time_parameters);
2366

    
2367
  ndwl=time_result.best_Ndwl;
2368
  ndbl=time_result.best_Ndbl;
2369
  nspd=time_result.best_Nspd;
2370
  ntwl=time_result.best_Ntwl;
2371
  ntbl=time_result.best_Ntbl;
2372
  ntspd=time_result.best_Ntspd;
2373
  c = time_parameters.cache_size;
2374
  b = time_parameters.block_size;
2375
  a = time_parameters.associativity; 
2376

    
2377
  cache=1;
2378

    
2379
  rowsb = c/(b*a*ndbl*nspd);
2380
  colsb = 8*b*a*nspd/ndwl;
2381

    
2382
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2383
  trowsb = c/(b*a*ntbl*ntspd);
2384
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2385

    
2386
  if(verbose) {
2387
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2388
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2389
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2390
    fprintf(stderr,"tagsize == %d\n",tagsize);
2391

    
2392
    fprintf(stderr,"\nntwl == %d, ntbl == %d, ntspd == %d\n",ntwl,ntbl,ntspd);
2393
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ntwl*ntbl,trowsb,tcolsb);
2394
  }
2395

    
2396
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2397
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2398
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2399

    
2400
  if(verbose)
2401
    fprintf(stderr,"dcache power stats\n");
2402
  power->dcache_decoder = res_memport*ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2403
  power->dcache_wordline = res_memport*ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2404
  power->dcache_bitline = res_memport*ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2405
  power->dcache_senseamp = res_memport*ndwl*ndbl*senseamp_power(colsb);
2406
  power->dcache_tagarray = res_memport*ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2407

    
2408
  power->dcache_power = power->dcache_decoder + power->dcache_wordline + power->dcache_bitline + power->dcache_senseamp + power->dcache_tagarray;
2409

    
2410
  clockpower = total_clockpower(.018);
2411
  power->clock_power = clockpower;
2412
  if(verbose) {
2413
    fprintf(stderr,"result bus power == %f\n",power->resultbus);
2414
    fprintf(stderr,"global clock power == %f\n",clockpower);
2415
  }
2416

    
2417
  time_parameters.cache_size = cache_dl2->nsets * cache_dl2->bsize * cache_dl2->assoc; /* C */
2418
  time_parameters.block_size = cache_dl2->bsize; /* B */
2419
  time_parameters.associativity = cache_dl2->assoc; /* A */
2420
  time_parameters.number_of_sets = cache_dl2->nsets; /* C/(B*A) */
2421

    
2422
  calculate_time(&time_result,&time_parameters);
2423
  output_data(&time_result,&time_parameters);
2424

    
2425
  ndwl=time_result.best_Ndwl;
2426
  ndbl=time_result.best_Ndbl;
2427
  nspd=time_result.best_Nspd;
2428
  ntwl=time_result.best_Ntwl;
2429
  ntbl=time_result.best_Ntbl;
2430
  ntspd=time_result.best_Ntspd;
2431
  c = time_parameters.cache_size;
2432
  b = time_parameters.block_size;
2433
  a = time_parameters.associativity;
2434

    
2435
  rowsb = c/(b*a*ndbl*nspd);
2436
  colsb = 8*b*a*nspd/ndwl;
2437

    
2438
  tagsize = va_size - ((int)logtwo(cache_dl2->nsets) + (int)logtwo(cache_dl2->bsize));
2439
  trowsb = c/(b*a*ntbl*ntspd);
2440
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2441

    
2442
  if(verbose) {
2443
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2444
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2445
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2446
    fprintf(stderr,"tagsize == %d\n",tagsize);
2447
  }
2448

    
2449
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2450
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2451
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2452

    
2453
  if(verbose)
2454
    fprintf(stderr,"dcache2 power stats\n");
2455
  power->dcache2_decoder = array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2456
  power->dcache2_wordline = array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2457
  power->dcache2_bitline = array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2458
  power->dcache2_senseamp = senseamp_power(colsb);
2459
  power->dcache2_tagarray = simple_array_power(trowsb,tcolsb,1,1,cache);
2460

    
2461
  power->dcache2_power = power->dcache2_decoder + power->dcache2_wordline + power->dcache2_bitline + power->dcache2_senseamp + power->dcache2_tagarray;
2462

    
2463
  power->rat_decoder *= crossover_scaling;
2464
  power->rat_wordline *= crossover_scaling;
2465
  power->rat_bitline *= crossover_scaling;
2466

    
2467
  power->dcl_compare *= crossover_scaling;
2468
  power->dcl_pencode *= crossover_scaling;
2469
  power->inst_decoder_power *= crossover_scaling;
2470
  power->wakeup_tagdrive *= crossover_scaling;
2471
  power->wakeup_tagmatch *= crossover_scaling;
2472
  power->wakeup_ormatch *= crossover_scaling;
2473

    
2474
  power->selection *= crossover_scaling;
2475

    
2476
  power->regfile_decoder *= crossover_scaling;
2477
  power->regfile_wordline *= crossover_scaling;
2478
  power->regfile_bitline *= crossover_scaling;
2479
  power->regfile_senseamp *= crossover_scaling;
2480

    
2481
  power->rs_decoder *= crossover_scaling;
2482
  power->rs_wordline *= crossover_scaling;
2483
  power->rs_bitline *= crossover_scaling;
2484
  power->rs_senseamp *= crossover_scaling;
2485

    
2486
  power->lsq_wakeup_tagdrive *= crossover_scaling;
2487
  power->lsq_wakeup_tagmatch *= crossover_scaling;
2488

    
2489
  power->lsq_rs_decoder *= crossover_scaling;
2490
  power->lsq_rs_wordline *= crossover_scaling;
2491
  power->lsq_rs_bitline *= crossover_scaling;
2492
  power->lsq_rs_senseamp *= crossover_scaling;
2493
 
2494
  power->resultbus *= crossover_scaling;
2495

    
2496
  power->btb *= crossover_scaling;
2497
  power->local_predict *= crossover_scaling;
2498
  power->global_predict *= crossover_scaling;
2499
  power->chooser *= crossover_scaling;
2500

    
2501
  power->dtlb *= crossover_scaling;
2502

    
2503
  power->itlb *= crossover_scaling;
2504

    
2505
  power->icache_decoder *= crossover_scaling;
2506
  power->icache_wordline*= crossover_scaling;
2507
  power->icache_bitline *= crossover_scaling;
2508
  power->icache_senseamp*= crossover_scaling;
2509
  power->icache_tagarray*= crossover_scaling;
2510

    
2511
  power->icache_power *= crossover_scaling;
2512

    
2513
  power->dcache_decoder *= crossover_scaling;
2514
  power->dcache_wordline *= crossover_scaling;
2515
  power->dcache_bitline *= crossover_scaling;
2516
  power->dcache_senseamp *= crossover_scaling;
2517
  power->dcache_tagarray *= crossover_scaling;
2518

    
2519
  power->dcache_power *= crossover_scaling;
2520
  
2521
  power->clock_power *= crossover_scaling;
2522

    
2523
  power->dcache2_decoder *= crossover_scaling;
2524
  power->dcache2_wordline *= crossover_scaling;
2525
  power->dcache2_bitline *= crossover_scaling;
2526
  power->dcache2_senseamp *= crossover_scaling;
2527
  power->dcache2_tagarray *= crossover_scaling;
2528

    
2529
  power->dcache2_power *= crossover_scaling;
2530

    
2531
  power->total_power = power->local_predict + power->global_predict + 
2532
    power->chooser + power->btb +
2533
    power->rat_decoder + power->rat_wordline + 
2534
    power->rat_bitline + power->rat_senseamp + 
2535
    power->dcl_compare + power->dcl_pencode + 
2536
    power->inst_decoder_power +
2537
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2538
    power->selection +
2539
    power->regfile_decoder + power->regfile_wordline + 
2540
    power->regfile_bitline + power->regfile_senseamp +  
2541
    power->rs_decoder + power->rs_wordline +
2542
    power->rs_bitline + power->rs_senseamp + 
2543
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2544
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2545
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2546
    power->resultbus +
2547
    power->clock_power +
2548
    power->icache_power + 
2549
    power->itlb + 
2550
    power->dcache_power + 
2551
    power->dtlb + 
2552
    power->dcache2_power;
2553

    
2554
  power->total_power_nodcache2 =power->local_predict + power->global_predict + 
2555
    power->chooser + power->btb +
2556
    power->rat_decoder + power->rat_wordline + 
2557
    power->rat_bitline + power->rat_senseamp + 
2558
    power->dcl_compare + power->dcl_pencode + 
2559
    power->inst_decoder_power +
2560
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2561
    power->selection +
2562
    power->regfile_decoder + power->regfile_wordline + 
2563
    power->regfile_bitline + power->regfile_senseamp +  
2564
    power->rs_decoder + power->rs_wordline +
2565
    power->rs_bitline + power->rs_senseamp + 
2566
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2567
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2568
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2569
    power->resultbus +
2570
    power->clock_power +
2571
    power->icache_power + 
2572
    power->itlb + 
2573
    power->dcache_power + 
2574
    power->dtlb + 
2575
    power->dcache2_power;
2576

    
2577
  power->bpred_power = power->btb + power->local_predict + power->global_predict + power->chooser + power->ras;
2578

    
2579
  power->rat_power = power->rat_decoder + 
2580
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
2581

    
2582
  power->dcl_power = power->dcl_compare + power->dcl_pencode;
2583

    
2584
  power->rename_power = power->rat_power + 
2585
    power->dcl_power + 
2586
    power->inst_decoder_power;
2587

    
2588
  power->wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
2589
    power->wakeup_ormatch;
2590

    
2591
  power->rs_power = power->rs_decoder + 
2592
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
2593

    
2594
  power->rs_power_nobit = power->rs_decoder + 
2595
    power->rs_wordline + power->rs_senseamp;
2596

    
2597
  power->window_power = power->wakeup_power + power->rs_power + 
2598
    power->selection;
2599

    
2600
  power->lsq_rs_power = power->lsq_rs_decoder + 
2601
    power->lsq_rs_wordline + power->lsq_rs_bitline + 
2602
    power->lsq_rs_senseamp;
2603

    
2604
  power->lsq_rs_power_nobit = power->lsq_rs_decoder + 
2605
    power->lsq_rs_wordline + power->lsq_rs_senseamp;
2606
   
2607
  power->lsq_wakeup_power = power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch;
2608

    
2609
  power->lsq_power = power->lsq_wakeup_power + power->lsq_rs_power;
2610

    
2611
  power->regfile_power = power->regfile_decoder + 
2612
    power->regfile_wordline + power->regfile_bitline + 
2613
    power->regfile_senseamp;
2614

    
2615
  power->regfile_power_nobit = power->regfile_decoder + 
2616
    power->regfile_wordline + power->regfile_senseamp;
2617

    
2618
  dump_power_stats(power);
2619

    
2620
}