Statistics
| Revision:

root / wattch / power.c @ 55

History | View | Annotate | Download (103 KB)

1
/* I inclued this copyright since we're using Cacti for some stuff */
2

    
3
/*------------------------------------------------------------
4
 *  Copyright 1994 Digital Equipment Corporation and Steve Wilton
5
 *                         All Rights Reserved
6
 *
7
 * Permission to use, copy, and modify this software and its documentation is
8
 * hereby granted only under the following terms and conditions.  Both the
9
 * above copyright notice and this permission notice must appear in all copies
10
 * of the software, derivative works or modified versions, and any portions
11
 * thereof, and both notices must appear in supporting documentation.
12
 *
13
 * Users of this software agree to the terms and conditions set forth herein,
14
 * and hereby grant back to Digital a non-exclusive, unrestricted, royalty-
15
 * free right and license under any changes, enhancements or extensions
16
 * made to the core functions of the software, including but not limited to
17
 * those affording compatibility with other hardware or software
18
 * environments, but excluding applications which incorporate this software.
19
 * Users further agree to use their best efforts to return to Digital any
20
 * such changes, enhancements or extensions that they make and inform Digital
21
 * of noteworthy uses of this software.  Correspondence should be provided
22
 * to Digital at:
23
 *
24
 *                       Director of Licensing
25
 *                       Western Research Laboratory
26
 *                       Digital Equipment Corporation
27
 *                       100 Hamilton Avenue
28
 *                       Palo Alto, California  94301
29
 *
30
 * This software may be distributed (but not offered for sale or transferred
31
 * for compensation) to third parties, provided such third parties agree to
32
 * abide by the terms and conditions of this notice.
33
 *
34
 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
35
 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
36
 * OF MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
37
 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
38
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
39
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
41
 * SOFTWARE.
42
 *------------------------------------------------------------*/
43

    
44
#include <math.h>
45
#include "power.h"
46
#include "machine.h"
47
#include "cache.h"
48
#include "sim.h"
49
#include <assert.h>
50

    
51
//#define SensePowerfactor (Mhz)*(Vdd/2)*(Vdd/2)
52
//#define Sense2Powerfactor (Mhz)*(2*.3+.1*Vdd)
53
//#define Powerfactor (Mhz)*Vdd*Vdd
54
//#define LowSwingPowerfactor (Mhz)*.2*.2
55
/* set scale for crossover (vdd->gnd) currents */
56
double crossover_scaling = 1.2;
57
/* set non-ideal turnoff percentage */
58
double turnoff_factor = 0.1;
59

    
60
#define MSCALE (LSCALE * .624 / .2250)
61

    
62
/*----------------------------------------------------------------------*/
63

    
64
/* static power model results */
65
power_result_type power;
66

    
67
int pow2(int x) {
68
  return((int)pow(2.0,(double)x));
69
}
70

    
71
double logfour(x)
72
     double x;
73
{
74
  if (x<=0) fprintf(stderr,"%e\n",x);
75
  return( (double) (log(x)/log(4.0)) );
76
}
77

    
78
/* safer pop count to validate the fast algorithm */
79
int pop_count_slow(bquad_t bits)
80
{
81
  int count = 0; 
82
  bquad_t tmpbits = bits; 
83
  while (tmpbits) { 
84
    if (tmpbits & 1) ++count; 
85
    tmpbits >>= 1; 
86
  } 
87
  return count; 
88
}
89

    
90
/* fast pop count */
91
int pop_count(bquad_t bits)
92
{
93
#define T unsigned long long
94
#define ONES ((T)(-1)) 
95
#define TWO(k) ((T)1 << (k)) 
96
#define CYCL(k) (ONES/(1 + (TWO(TWO(k))))) 
97
#define BSUM(x,k) ((x)+=(x) >> TWO(k), (x) &= CYCL(k)) 
98
  bquad_t x = bits; 
99
  x = (x & CYCL(0)) + ((x>>TWO(0)) & CYCL(0)); 
100
  x = (x & CYCL(1)) + ((x>>TWO(1)) & CYCL(1)); 
101
  BSUM(x,2); 
102
  BSUM(x,3); 
103
  BSUM(x,4); 
104
  BSUM(x,5); 
105
  return x; 
106
}
107

    
108

    
109
int opcode_length = 8;
110
int inst_length = 32;
111

    
112
extern int ruu_decode_width;
113
extern int ruu_issue_width;
114
extern int ruu_commit_width;
115
extern int RUU_size;
116
extern int LSQ_size;
117
extern int data_width;
118
extern int res_ialu;
119
extern int res_fpalu;
120
extern int res_memport;
121

    
122
int nvreg_width;
123
int npreg_width;
124

    
125
extern int bimod_config[];
126

    
127
extern struct cache_t *cache_dl1;
128
extern struct cache_t *cache_il1;
129
extern struct cache_t *cache_dl2;
130

    
131
extern struct cache_t *dtlb;
132
extern struct cache_t *itlb;
133

    
134
/* 2-level predictor config (<l1size> <l2size> <hist_size> <xor>) */
135
extern int twolev_config[];
136

    
137
/* combining predictor config (<meta_table_size> */
138
extern int comb_config[];
139

    
140
/* return address stack (RAS) size */
141
extern int ras_size;
142

    
143
/* BTB predictor config (<num_sets> <associativity>) */
144
extern int btb_config[];
145

    
146
double global_clockcap;
147

    
148
static double rename_power=0;
149
static double bpred_power=0;
150
static double window_power=0;
151
static double lsq_power=0;
152
static double regfile_power=0;
153
static double icache_power=0;
154
static double dcache_power=0;
155
static double dcache2_power=0;
156
static double alu_power=0;
157
static double falu_power=0;
158
static double resultbus_power=0;
159
static double clock_power=0;
160

    
161
static double rename_power_cc1=0;
162
static double bpred_power_cc1=0;
163
static double window_power_cc1=0;
164
static double lsq_power_cc1=0;
165
static double regfile_power_cc1=0;
166
static double icache_power_cc1=0;
167
static double dcache_power_cc1=0;
168
static double dcache2_power_cc1=0;
169
static double alu_power_cc1=0;
170
static double resultbus_power_cc1=0;
171
static double clock_power_cc1=0;
172

    
173
static double rename_power_cc2=0;
174
static double bpred_power_cc2=0;
175
static double window_power_cc2=0;
176
static double lsq_power_cc2=0;
177
static double regfile_power_cc2=0;
178
static double icache_power_cc2=0;
179
static double dcache_power_cc2=0;
180
static double dcache2_power_cc2=0;
181
static double alu_power_cc2=0;
182
static double resultbus_power_cc2=0;
183
static double clock_power_cc2=0;
184

    
185
static double rename_power_cc3=0;
186
static double bpred_power_cc3=0;
187
static double window_power_cc3=0;
188
static double lsq_power_cc3=0;
189
static double regfile_power_cc3=0;
190
static double icache_power_cc3=0;
191
static double dcache_power_cc3=0;
192
static double dcache2_power_cc3=0;
193
static double alu_power_cc3=0;
194
static double resultbus_power_cc3=0;
195
static double clock_power_cc3=0;
196

    
197
static double total_cycle_power;
198
static double total_cycle_power_cc1;
199
static double total_cycle_power_cc2;
200
static double total_cycle_power_cc3;
201

    
202
static double total_parasitic_cc1 = 0.0;
203
static double total_parasitic_cc2 = 0.0;
204
static double total_parasitic_cc3 = 0.0;
205
static double offchip_parasitic_cc1 = 0.0;
206
static double offchip_parasitic_cc2 = 0.0;
207
static double offchip_parasitic_cc3 = 0.0;
208
static double onchip_parasitic_cc1 = 0.0;
209
static double onchip_parasitic_cc2 = 0.0;
210
static double onchip_parasitic_cc3 = 0.0;
211
#define PARASITIC_OHM 0.002
212
static double max_amp = 0.00;
213
static double min_amp = 1000.00;
214
static double offchip_ploss[] = {0.5, 0.5, // 1 amp
215
                                 0.5, 0.5, // 2 amp
216
                                 0.5, 0.5, // 3 amp
217
                                 0.6, 0.7, // 4
218
                                 0.8, 0.9, // 5
219
                                 1.0, 1.1, // 6
220
                                 1.2, 1.3, // 7
221
                                 1.5, 1.6, // 8
222
                                 1.8, 2.0, // 9
223
                                 2.2, 2.4, // 10
224
                                 2.6, 2.8, // 11
225
                                 3.0, 3.3, // 12
226
                                 3.6, 3.9, 4.0}; // 13
227

    
228
static double last_single_total_cycle_power_cc1 = 0.0;
229
static double last_single_total_cycle_power_cc2 = 0.0;
230
static double last_single_total_cycle_power_cc3 = 0.0;
231
static double current_total_cycle_power_cc1;
232
static double current_total_cycle_power_cc2;
233
static double current_total_cycle_power_cc3;
234

    
235
static double last_sim_num_insn = 0;
236
static double last_sim_total_insn = 0;
237
static double diff_dispatch = 0;
238
static double diff_commit = 0;
239
static int speed_grade = 1;
240
static int last_speed_grade = 1;
241
static double diff_dispatch_sum = 0;
242
static double diff_commit_sum = 0;
243
static int init_count = 0;
244
//#define DVFS_FIX
245
#define SUM_OVER 50000 // longer time = more power consumed
246
static double hist_dispatch[SUM_OVER];
247
static double hist_commit[SUM_OVER];
248
static int hist_idx = 0;
249
static double slow_cycles = 0;
250
static double fast_cycles = 0;
251
static double last_switch_time = 0;
252
static double cycle_count = 0;
253
#define SWITCH_CYCLES 30
254
static int speed_delay[SWITCH_CYCLES];
255
#define ONCHIP_VREG_LOSS_LOW 0.220
256
#define ONCHIP_VREG_LOSS_HIGH 0.120
257

    
258
static double max_cycle_power_cc1 = 0.0;
259
static double max_cycle_power_cc2 = 0.0;
260
static double max_cycle_power_cc3 = 0.0;
261

    
262
extern counter_t rename_access;
263
extern counter_t bpred_access;
264
extern counter_t window_access;
265
extern counter_t lsq_access;
266
extern counter_t regfile_access;
267
extern counter_t icache_access;
268
extern counter_t dcache_access;
269
extern counter_t dcache2_access;
270
extern counter_t alu_access;
271
extern counter_t ialu_access;
272
extern counter_t falu_access;
273
extern counter_t resultbus_access;
274

    
275
extern counter_t window_selection_access;
276
extern counter_t window_wakeup_access;
277
extern counter_t window_preg_access;
278
extern counter_t lsq_preg_access;
279
extern counter_t lsq_wakeup_access;
280
extern counter_t lsq_store_data_access;
281
extern counter_t lsq_load_data_access;
282

    
283
extern counter_t window_total_pop_count_cycle;
284
extern counter_t window_num_pop_count_cycle;
285
extern counter_t lsq_total_pop_count_cycle;
286
extern counter_t lsq_num_pop_count_cycle;
287
extern counter_t regfile_total_pop_count_cycle;
288
extern counter_t regfile_num_pop_count_cycle;
289
extern counter_t resultbus_total_pop_count_cycle;
290
extern counter_t resultbus_num_pop_count_cycle;
291

    
292
static counter_t total_rename_access=0;
293
static counter_t total_bpred_access=0;
294
static counter_t total_window_access=0;
295
static counter_t total_lsq_access=0;
296
static counter_t total_regfile_access=0;
297
static counter_t total_icache_access=0;
298
static counter_t total_dcache_access=0;
299
static counter_t total_dcache2_access=0;
300
static counter_t total_alu_access=0;
301
static counter_t total_resultbus_access=0;
302

    
303
static counter_t max_rename_access;
304
static counter_t max_bpred_access;
305
static counter_t max_window_access;
306
static counter_t max_lsq_access;
307
static counter_t max_regfile_access;
308
static counter_t max_icache_access;
309
static counter_t max_dcache_access;
310
static counter_t max_dcache2_access;
311
static counter_t max_alu_access;
312
static counter_t max_resultbus_access;
313

    
314
void clear_access_stats()
315
{
316
  rename_access=0;
317
  bpred_access=0;
318
  window_access=0;
319
  lsq_access=0;
320
  regfile_access=0;
321
  icache_access=0;
322
  dcache_access=0;
323
  dcache2_access=0;
324
  alu_access=0;
325
  ialu_access=0;
326
  falu_access=0;
327
  resultbus_access=0;
328

    
329
  window_preg_access=0;
330
  window_selection_access=0;
331
  window_wakeup_access=0;
332
  lsq_store_data_access=0;
333
  lsq_load_data_access=0;
334
  lsq_wakeup_access=0;
335
  lsq_preg_access=0;
336

    
337
  window_total_pop_count_cycle=0;
338
  window_num_pop_count_cycle=0;
339
  lsq_total_pop_count_cycle=0;
340
  lsq_num_pop_count_cycle=0;
341
  regfile_total_pop_count_cycle=0;
342
  regfile_num_pop_count_cycle=0;
343
  resultbus_total_pop_count_cycle=0;
344
  resultbus_num_pop_count_cycle=0;
345
}
346

    
347
/* compute bitline activity factors which we use to scale bitline power 
348
   Here it is very important whether we assume 0's or 1's are
349
   responsible for dissipating power in pre-charged stuctures. (since
350
   most of the bits are 0's, we assume the design is power-efficient
351
   enough to allow 0's to _not_ discharge 
352
*/
353
double compute_af(counter_t num_pop_count_cycle,counter_t total_pop_count_cycle,int pop_width) {
354
  double avg_pop_count;
355
  double af,af_b;
356

    
357
  if(num_pop_count_cycle)
358
    avg_pop_count = (double)total_pop_count_cycle / (double)num_pop_count_cycle;
359
  else
360
    avg_pop_count = 0;
361

    
362
  af = avg_pop_count / (double)pop_width;
363
  
364
  af_b = 1.0 - af;
365

    
366
  /*  printf("af == %f%%, af_b == %f%%, total_pop == %d, num_pop == %d\n",100*af,100*af_b,total_pop_count_cycle,num_pop_count_cycle); */
367

    
368
  return(af_b);
369
}
370

    
371
/* compute power statistics on each cycle, for each conditional clocking style.  Obviously
372
most of the speed penalty comes here, so if you don't want per-cycle power estimates
373
you could post-process 
374

375
See README.wattch for details on the various clock gating styles.
376

377
*/
378
void update_power_stats()
379
{
380
  double window_af_b, lsq_af_b, regfile_af_b, resultbus_af_b;
381
  double current;
382
  int speed_idx;
383

    
384
#ifdef DYNAMIC_AF
385
  window_af_b = compute_af(window_num_pop_count_cycle,window_total_pop_count_cycle,data_width);
386
  lsq_af_b = compute_af(lsq_num_pop_count_cycle,lsq_total_pop_count_cycle,data_width);
387
  regfile_af_b = compute_af(regfile_num_pop_count_cycle,regfile_total_pop_count_cycle,data_width);
388
  resultbus_af_b = compute_af(resultbus_num_pop_count_cycle,resultbus_total_pop_count_cycle,data_width);
389
#endif
390
  
391
  rename_power+=power.rename_power;
392
  bpred_power+=power.bpred_power;
393
  window_power+=power.window_power;
394
  lsq_power+=power.lsq_power;
395
  regfile_power+=power.regfile_power;
396
  icache_power+=power.icache_power+power.itlb;
397
  dcache_power+=power.dcache_power+power.dtlb;
398
  dcache2_power+=power.dcache2_power;
399
  alu_power+=power.ialu_power + power.falu_power;
400
  falu_power+=power.falu_power;
401
  resultbus_power+=power.resultbus;
402
  clock_power+=power.clock_power;
403

    
404
  total_rename_access+=rename_access;
405
  total_bpred_access+=bpred_access;
406
  total_window_access+=window_access;
407
  total_lsq_access+=lsq_access;
408
  total_regfile_access+=regfile_access;
409
  total_icache_access+=icache_access;
410
  total_dcache_access+=dcache_access;
411
  total_dcache2_access+=dcache2_access;
412
  total_alu_access+=alu_access;
413
  total_resultbus_access+=resultbus_access;
414

    
415
  max_rename_access=MAX(rename_access,max_rename_access);
416
  max_bpred_access=MAX(bpred_access,max_bpred_access);
417
  max_window_access=MAX(window_access,max_window_access);
418
  max_lsq_access=MAX(lsq_access,max_lsq_access);
419
  max_regfile_access=MAX(regfile_access,max_regfile_access);
420
  max_icache_access=MAX(icache_access,max_icache_access);
421
  max_dcache_access=MAX(dcache_access,max_dcache_access);
422
  max_dcache2_access=MAX(dcache2_access,max_dcache2_access);
423
  max_alu_access=MAX(alu_access,max_alu_access);
424
  max_resultbus_access=MAX(resultbus_access,max_resultbus_access);
425
      
426
  if(rename_access) {
427
    rename_power_cc1+=power.rename_power;
428
    rename_power_cc2+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
429
    rename_power_cc3+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
430
  }
431
  else 
432
    rename_power_cc3+=turnoff_factor*power.rename_power;
433

    
434
  if(bpred_access) {
435
    if(bpred_access <= 2)
436
      bpred_power_cc1+=power.bpred_power;
437
    else
438
      bpred_power_cc1+=((double)bpred_access/2.0) * power.bpred_power;
439
    bpred_power_cc2+=((double)bpred_access/2.0) * power.bpred_power;
440
    bpred_power_cc3+=((double)bpred_access/2.0) * power.bpred_power;
441
  }
442
  else
443
    bpred_power_cc3+=turnoff_factor*power.bpred_power;
444

    
445
#ifdef STATIC_AF
446
  if(window_preg_access) {
447
    if(window_preg_access <= 3*ruu_issue_width)
448
      window_power_cc1+=power.rs_power;
449
    else
450
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
451
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
452
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
453
  }
454
  else
455
    window_power_cc3+=turnoff_factor*power.rs_power;
456
#elif defined(DYNAMIC_AF)
457
  if(window_preg_access) {
458
    if(window_preg_access <= 3*ruu_issue_width)
459
      window_power_cc1+=power.rs_power_nobit + window_af_b*power.rs_bitline;
460
    else
461
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
462
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
463
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
464
  }
465
  else
466
    window_power_cc3+=turnoff_factor*power.rs_power;
467
#else
468
  panic("no AF-style defined\n");
469
#endif
470

    
471
  if(window_selection_access) {
472
    if(window_selection_access <= ruu_issue_width)
473
      window_power_cc1+=power.selection;
474
    else
475
      window_power_cc1+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
476
    window_power_cc2+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
477
    window_power_cc3+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
478
  }
479
  else
480
    window_power_cc3+=turnoff_factor*power.selection;
481

    
482
  if(window_wakeup_access) {
483
    if(window_wakeup_access <= ruu_issue_width)
484
      window_power_cc1+=power.wakeup_power;
485
    else
486
      window_power_cc1+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
487
    window_power_cc2+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
488
    window_power_cc3+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
489
  }
490
  else
491
    window_power_cc3+=turnoff_factor*power.wakeup_power;
492

    
493
  if(lsq_wakeup_access) {
494
    if(lsq_wakeup_access <= res_memport)
495
      lsq_power_cc1+=power.lsq_wakeup_power;
496
    else
497
      lsq_power_cc1+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
498
    lsq_power_cc2+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
499
    lsq_power_cc3+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
500
  }
501
  else
502
    lsq_power_cc3+=turnoff_factor*power.lsq_wakeup_power;
503

    
504
#ifdef STATIC_AF
505
  if(lsq_preg_access) {
506
    if(lsq_preg_access <= res_memport)
507
      lsq_power_cc1+=power.lsq_rs_power;
508
    else
509
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
510
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
511
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
512
  }
513
  else
514
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
515
#else
516
  if(lsq_preg_access) {
517
    if(lsq_preg_access <= res_memport)
518
      lsq_power_cc1+=power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline;
519
    else
520
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
521
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
522
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
523
  }
524
  else
525
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
526
#endif
527

    
528
#ifdef STATIC_AF
529
  if(regfile_access) {
530
    if(regfile_access <= (3.0*ruu_commit_width))
531
      regfile_power_cc1+=power.regfile_power;
532
    else
533
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
534
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
535
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
536
  }
537
  else
538
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
539
#else
540
  if(regfile_access) {
541
    if(regfile_access <= (3.0*ruu_commit_width))
542
      regfile_power_cc1+=power.regfile_power_nobit + regfile_af_b*power.regfile_bitline;
543
    else
544
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
545
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
546
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
547
  }
548
  else
549
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
550
#endif
551

    
552
  if(icache_access) {
553
    /* don't scale icache because we assume 1 line is fetched, unless fetch stalls */
554
    icache_power_cc1+=power.icache_power+power.itlb;
555
    icache_power_cc2+=power.icache_power+power.itlb;
556
    icache_power_cc3+=power.icache_power+power.itlb;
557
  }
558
  else
559
    icache_power_cc3+=turnoff_factor*(power.icache_power+power.itlb);
560

    
561
  if(dcache_access) {
562
    if(dcache_access <= res_memport)
563
      dcache_power_cc1+=power.dcache_power+power.dtlb;
564
    else
565
      dcache_power_cc1+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
566
                                                     power.dtlb);
567
    dcache_power_cc2+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
568
                                                   power.dtlb);
569
    dcache_power_cc3+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
570
                                                   power.dtlb);
571
  }
572
  else
573
    dcache_power_cc3+=turnoff_factor*(power.dcache_power+power.dtlb);
574

    
575
  if(dcache2_access) {
576
    if(dcache2_access <= res_memport)
577
      dcache2_power_cc1+=power.dcache2_power;
578
    else
579
      dcache2_power_cc1+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
580
    dcache2_power_cc2+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
581
    dcache2_power_cc3+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
582
  }
583
  else
584
    dcache2_power_cc3+=turnoff_factor*power.dcache2_power;
585

    
586
  if(alu_access) {
587
    if(ialu_access)
588
      alu_power_cc1+=power.ialu_power;
589
    else
590
      alu_power_cc3+=turnoff_factor*power.ialu_power;
591
    if(falu_access)
592
      alu_power_cc1+=power.falu_power;
593
    else
594
      alu_power_cc3+=turnoff_factor*power.falu_power;
595

    
596
    alu_power_cc2+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
597
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
598
    alu_power_cc3+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
599
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
600
  }
601
  else
602
    alu_power_cc3+=turnoff_factor*(power.ialu_power + power.falu_power);
603

    
604
#ifdef STATIC_AF
605
  if(resultbus_access) {
606
    assert(ruu_issue_width != 0);
607
    if(resultbus_access <= ruu_issue_width) {
608
      resultbus_power_cc1+=power.resultbus;
609
    }
610
    else {
611
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
612
    }
613
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
614
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
615
  }
616
  else
617
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
618
#else
619
  if(resultbus_access) {
620
    assert(ruu_issue_width != 0);
621
    if(resultbus_access <= ruu_issue_width) {
622
      resultbus_power_cc1+=resultbus_af_b*power.resultbus;
623
    }
624
    else {
625
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
626
    }
627
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
628
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
629
  }
630
  else
631
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
632
#endif
633

    
634
  total_cycle_power = rename_power + bpred_power + window_power + 
635
    lsq_power + regfile_power + icache_power + dcache_power +
636
    alu_power + resultbus_power;
637

    
638
  total_cycle_power_cc1 = rename_power_cc1 + bpred_power_cc1 + 
639
    window_power_cc1 + lsq_power_cc1 + regfile_power_cc1 + 
640
    icache_power_cc1 + dcache_power_cc1 + alu_power_cc1 + 
641
    resultbus_power_cc1;
642

    
643
  total_cycle_power_cc2 = rename_power_cc2 + bpred_power_cc2 + 
644
    window_power_cc2 + lsq_power_cc2 + regfile_power_cc2 + 
645
    icache_power_cc2 + dcache_power_cc2 + alu_power_cc2 + 
646
    resultbus_power_cc2;
647

    
648
  total_cycle_power_cc3 = rename_power_cc3 + bpred_power_cc3 + 
649
    window_power_cc3 + lsq_power_cc3 + regfile_power_cc3 + 
650
    icache_power_cc3 + dcache_power_cc3 + alu_power_cc3 + 
651
    resultbus_power_cc3;
652

    
653
  clock_power_cc1+=power.clock_power*(total_cycle_power_cc1/total_cycle_power);
654
  clock_power_cc2+=power.clock_power*(total_cycle_power_cc2/total_cycle_power);
655
  clock_power_cc3+=power.clock_power*(total_cycle_power_cc3/total_cycle_power);
656

    
657
  total_cycle_power_cc1 += clock_power_cc1;
658
  total_cycle_power_cc2 += clock_power_cc2;
659
  total_cycle_power_cc3 += clock_power_cc3;
660

    
661
  current_total_cycle_power_cc1 = total_cycle_power_cc1
662
    -last_single_total_cycle_power_cc1;
663
  current_total_cycle_power_cc2 = total_cycle_power_cc2
664
    -last_single_total_cycle_power_cc2;
665
  current_total_cycle_power_cc3 = total_cycle_power_cc3
666
    -last_single_total_cycle_power_cc3;
667

    
668
   current = current_total_cycle_power_cc3 / Vdd;
669

    
670
  if (max_amp < current ) {
671
      max_amp = current ;
672
  }
673

    
674
  if (min_amp > current) {
675
      min_amp = current;
676
  }
677

    
678
  if (current < 0.5) {
679
      offchip_parasitic_cc1 += offchip_ploss[0];
680
      offchip_parasitic_cc2 += offchip_ploss[0];
681
      offchip_parasitic_cc3 += offchip_ploss[0];
682
  } else if (current < 1) {
683
      offchip_parasitic_cc1 += offchip_ploss[1];
684
      offchip_parasitic_cc2 += offchip_ploss[1];
685
      offchip_parasitic_cc3 += offchip_ploss[1];
686
  } else if (current < 1.5) {
687
      offchip_parasitic_cc1 += offchip_ploss[2];
688
      offchip_parasitic_cc2 += offchip_ploss[2];
689
      offchip_parasitic_cc3 += offchip_ploss[2];
690
  } else if (current < 2) {
691
      offchip_parasitic_cc1 += offchip_ploss[3];
692
      offchip_parasitic_cc2 += offchip_ploss[3];
693
      offchip_parasitic_cc3 += offchip_ploss[3];
694
  } else if (current < 2.5) {
695
      offchip_parasitic_cc1 += offchip_ploss[4];
696
      offchip_parasitic_cc2 += offchip_ploss[4];
697
      offchip_parasitic_cc3 += offchip_ploss[4];
698
  } else if (current < 3) {
699
      offchip_parasitic_cc1 += offchip_ploss[5];
700
      offchip_parasitic_cc2 += offchip_ploss[5];
701
      offchip_parasitic_cc3 += offchip_ploss[5];
702
  } else if (current < 3.5) {
703
      offchip_parasitic_cc1 += offchip_ploss[6];
704
      offchip_parasitic_cc2 += offchip_ploss[6];
705
      offchip_parasitic_cc3 += offchip_ploss[6];
706
  } else if (current < 4) {
707
      offchip_parasitic_cc1 += offchip_ploss[7];
708
      offchip_parasitic_cc2 += offchip_ploss[7];
709
      offchip_parasitic_cc3 += offchip_ploss[7];
710
  } else if (current < 4.5) {
711
      offchip_parasitic_cc1 += offchip_ploss[8];
712
      offchip_parasitic_cc2 += offchip_ploss[8];
713
      offchip_parasitic_cc3 += offchip_ploss[8];
714
  } else if (current < 5) {
715
      offchip_parasitic_cc1 += offchip_ploss[9];
716
      offchip_parasitic_cc2 += offchip_ploss[9];
717
      offchip_parasitic_cc3 += offchip_ploss[9];
718
  } else if (current < 5.5) {
719
      offchip_parasitic_cc1 += offchip_ploss[10];
720
      offchip_parasitic_cc2 += offchip_ploss[10];
721
      offchip_parasitic_cc3 += offchip_ploss[10];
722
  } else if (current < 6) {
723
      offchip_parasitic_cc1 += offchip_ploss[11];
724
      offchip_parasitic_cc2 += offchip_ploss[11];
725
      offchip_parasitic_cc3 += offchip_ploss[11];
726
  } else if (current < 6.5) {
727
      offchip_parasitic_cc1 += offchip_ploss[12];
728
      offchip_parasitic_cc2 += offchip_ploss[12];
729
      offchip_parasitic_cc3 += offchip_ploss[12];
730
  } else if (current < 7) {
731
      offchip_parasitic_cc1 += offchip_ploss[13];
732
      offchip_parasitic_cc2 += offchip_ploss[13];
733
      offchip_parasitic_cc3 += offchip_ploss[13];
734
  } else if (current < 7.5) {
735
      offchip_parasitic_cc1 += offchip_ploss[14];
736
      offchip_parasitic_cc2 += offchip_ploss[14];
737
      offchip_parasitic_cc3 += offchip_ploss[14];
738
  } else if (current < 8) {
739
      offchip_parasitic_cc1 += offchip_ploss[15];
740
      offchip_parasitic_cc2 += offchip_ploss[15];
741
      offchip_parasitic_cc3 += offchip_ploss[15];
742
  } else if (current < 8.5) {
743
      offchip_parasitic_cc1 += offchip_ploss[16];
744
      offchip_parasitic_cc2 += offchip_ploss[16];
745
      offchip_parasitic_cc3 += offchip_ploss[16];
746
  } else if (current < 9) {
747
      offchip_parasitic_cc1 += offchip_ploss[17];
748
      offchip_parasitic_cc2 += offchip_ploss[17];
749
      offchip_parasitic_cc3 += offchip_ploss[17];
750
  } else if (current < 9.5) {
751
      offchip_parasitic_cc1 += offchip_ploss[18];
752
      offchip_parasitic_cc2 += offchip_ploss[18];
753
      offchip_parasitic_cc3 += offchip_ploss[18];
754
  } else if (current < 10) {
755
      offchip_parasitic_cc1 += offchip_ploss[19];
756
      offchip_parasitic_cc2 += offchip_ploss[19];
757
      offchip_parasitic_cc3 += offchip_ploss[19];
758
  } else if (current < 10.5) {
759
      offchip_parasitic_cc1 += offchip_ploss[20];
760
      offchip_parasitic_cc2 += offchip_ploss[20];
761
      offchip_parasitic_cc3 += offchip_ploss[20];
762
  } else if (current < 11) {
763
      offchip_parasitic_cc1 += offchip_ploss[21];
764
      offchip_parasitic_cc2 += offchip_ploss[21];
765
      offchip_parasitic_cc3 += offchip_ploss[21];
766
  } else if (current < 11.5) {
767
      offchip_parasitic_cc1 += offchip_ploss[22];
768
      offchip_parasitic_cc2 += offchip_ploss[22];
769
      offchip_parasitic_cc3 += offchip_ploss[22];
770
  } else if (current < 12) {
771
      offchip_parasitic_cc1 += offchip_ploss[23];
772
      offchip_parasitic_cc2 += offchip_ploss[23];
773
      offchip_parasitic_cc3 += offchip_ploss[23];
774
  } else if (current < 12.5) {
775
      offchip_parasitic_cc1 += offchip_ploss[24];
776
      offchip_parasitic_cc2 += offchip_ploss[24];
777
      offchip_parasitic_cc3 += offchip_ploss[24];
778
  } else if (current < 13) {
779
      offchip_parasitic_cc1 += offchip_ploss[25];
780
      offchip_parasitic_cc2 += offchip_ploss[25];
781
      offchip_parasitic_cc3 += offchip_ploss[25];
782
  } else {
783
      offchip_parasitic_cc1 += offchip_ploss[26];
784
      offchip_parasitic_cc2 += offchip_ploss[26];
785
      offchip_parasitic_cc3 += offchip_ploss[26];
786
  }
787

    
788
  offchip_parasitic_cc1 += pow(current, 2) * PARASITIC_OHM;
789
  offchip_parasitic_cc2 += pow(current, 2) * PARASITIC_OHM;
790
  offchip_parasitic_cc3 += pow(current, 2) * PARASITIC_OHM;
791

    
792
  // Onchip regulator paraisitc loss
793
  if (speed_grade == 0) {
794
      onchip_parasitic_cc1 += ONCHIP_VREG_LOSS_LOW;
795
      onchip_parasitic_cc2 += ONCHIP_VREG_LOSS_LOW;
796
      onchip_parasitic_cc3 += ONCHIP_VREG_LOSS_LOW;
797
  } else {
798
      onchip_parasitic_cc1 += ONCHIP_VREG_LOSS_HIGH;
799
      onchip_parasitic_cc2 += ONCHIP_VREG_LOSS_HIGH;
800
      onchip_parasitic_cc3 += ONCHIP_VREG_LOSS_HIGH;
801
  }
802
    
803
  max_cycle_power_cc1 = MAX(max_cycle_power_cc1,current_total_cycle_power_cc1);
804
  max_cycle_power_cc2 = MAX(max_cycle_power_cc2,current_total_cycle_power_cc2);
805
  max_cycle_power_cc3 = MAX(max_cycle_power_cc3,current_total_cycle_power_cc3);
806

    
807
  last_single_total_cycle_power_cc1 = total_cycle_power_cc1;
808
  last_single_total_cycle_power_cc2 = total_cycle_power_cc2;
809
  last_single_total_cycle_power_cc3 = total_cycle_power_cc3;
810

    
811
  cycle_count++;
812

    
813
  // here's where we change VFI levels
814
  diff_dispatch = sim_total_insn - last_sim_total_insn;
815
  diff_commit = sim_num_insn - last_sim_num_insn;
816
  
817
  diff_dispatch_sum += diff_dispatch;
818
  diff_commit_sum += diff_commit;
819

    
820
  hist_dispatch[hist_idx] = diff_dispatch;
821
  hist_commit[hist_idx] = diff_commit;
822
  hist_idx++;
823
  if(hist_idx >= SUM_OVER) {
824
    hist_idx = 0;
825
  }
826

    
827
  if(init_count >= SUM_OVER) {
828
      // Update speed
829
    speed_grade = speed_delay[SWITCH_CYCLES - 1];
830
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES-1; speed_idx++) {
831

    
832
        speed_delay[speed_idx+1] = speed_delay[speed_idx];
833
    }
834

    
835
    diff_dispatch_sum -= hist_dispatch[hist_idx];
836
    diff_commit_sum -= hist_commit[hist_idx];
837

    
838
    if( diff_commit_sum < diff_dispatch_sum ) {
839
        speed_delay[0] = 0;
840
    }
841
    else if( diff_commit_sum >= diff_dispatch_sum ) {
842
        speed_delay[0] = 1;
843
    }
844

    
845
    if(speed_grade == 0) {
846
        slow_cycles++;
847
    }
848
    else {
849
        fast_cycles++;
850
    }
851

    
852
  } else {
853
    init_count++;
854
    fast_cycles++;
855

    
856
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES; speed_idx++) {
857
        speed_delay[speed_idx] = 1;
858
    }
859
  }
860

    
861
//  if (diff_commit <= diff_dispatch) {
862
//      speed_grade = 0;
863
//  } else if (diff_commit > diff_dispatch) {
864
//      speed_grade = 1;
865
//  }
866

    
867
  if ((speed_grade == 0) && (last_speed_grade == 1)) {
868
      Mhz = Mhz / 2;
869
      Vdd = Vdd / 2;
870
      printf("Speed down!\n");
871
      last_switch_time = cycle_count;
872
  } else if ((speed_grade == 1) && (last_speed_grade == 0)) {
873
      Mhz = Mhz * 2;
874
      Vdd = Vdd * 2;
875
      printf("Speed up!\n");
876
      last_switch_time = cycle_count;
877
  }
878
#ifdef DVFS_FIX
879
  else if (last_switch_time < cycle_count-(SUM_OVER/3) && speed_grade==0 ) {
880
      speed_grade = 1;
881
      Mhz = Mhz * 2;
882
      Vdd = Vdd * 2;
883
      init_count = 0;
884
      last_switch_time = cycle_count;
885
      hist_idx = 0;
886
      diff_commit_sum = 0;
887
      diff_dispatch_sum = 0;
888
  }
889
#endif
890
      //printf("Vdd = %f, MHz = %f\n",Vdd,Mhz);
891

    
892
  if (speed_grade != last_speed_grade) {
893
    Period = 1/Mhz;
894
    SensePowerfactor3 = Mhz * Vbitsense * Vbitsense;
895
    SensePowerfactor2 = Mhz * (Vbitpre - Vbitsense) * (Vbitpre - Vbitsense);
896
    SensePowerfactor = (Mhz) * (Vdd/2) * (Vdd/2);
897
    Powerfactor = (Mhz) * (Vdd) * (Vdd);
898
    Sense2Powerfactor = Mhz * (2 * .3 + .1 * Vdd);
899
    LowSwingPowerfactor = Mhz * .2 * .2;
900
      calculate_power(&power);
901
  }
902

    
903
  last_speed_grade = speed_grade;
904

    
905
  // Update
906
  last_sim_num_insn  = sim_num_insn;
907
  last_sim_total_insn = sim_total_insn;
908

    
909
}
910

    
911
void
912
power_reg_stats(struct stat_sdb_t *sdb)        /* stats database */
913
{
914
  stat_reg_double(sdb, "rename_power", "total power usage of rename unit", &rename_power, 0, NULL);
915

    
916
  stat_reg_double(sdb, "bpred_power", "total power usage of bpred unit", &bpred_power, 0, NULL);
917

    
918
  stat_reg_double(sdb, "window_power", "total power usage of instruction window", &window_power, 0, NULL);
919

    
920
  stat_reg_double(sdb, "lsq_power", "total power usage of load/store queue", &lsq_power, 0, NULL);
921

    
922
  stat_reg_double(sdb, "regfile_power", "total power usage of arch. regfile", &regfile_power, 0, NULL);
923

    
924
  stat_reg_double(sdb, "icache_power", "total power usage of icache", &icache_power, 0, NULL);
925

    
926
  stat_reg_double(sdb, "dcache_power", "total power usage of dcache", &dcache_power, 0, NULL);
927

    
928
  stat_reg_double(sdb, "dcache2_power", "total power usage of dcache2", &dcache2_power, 0, NULL);
929

    
930
  stat_reg_double(sdb, "alu_power", "total power usage of alu", &alu_power, 0, NULL);
931

    
932
  stat_reg_double(sdb, "falu_power", "total power usage of falu", &falu_power, 0, NULL);
933

    
934
  stat_reg_double(sdb, "resultbus_power", "total power usage of resultbus", &resultbus_power, 0, NULL);
935

    
936
  stat_reg_double(sdb, "clock_power", "total power usage of clock", &clock_power, 0, NULL);
937

    
938
  stat_reg_formula(sdb, "avg_rename_power", "avg power usage of rename unit", "rename_power/sim_cycle", NULL);
939

    
940
  stat_reg_formula(sdb, "avg_bpred_power", "avg power usage of bpred unit", "bpred_power/sim_cycle", NULL);
941

    
942
  stat_reg_formula(sdb, "avg_window_power", "avg power usage of instruction window", "window_power/sim_cycle",  NULL);
943

    
944
  stat_reg_formula(sdb, "avg_lsq_power", "avg power usage of lsq", "lsq_power/sim_cycle",  NULL);
945

    
946
  stat_reg_formula(sdb, "avg_regfile_power", "avg power usage of arch. regfile", "regfile_power/sim_cycle",  NULL);
947

    
948
  stat_reg_formula(sdb, "avg_icache_power", "avg power usage of icache", "icache_power/sim_cycle",  NULL);
949

    
950
  stat_reg_formula(sdb, "avg_dcache_power", "avg power usage of dcache", "dcache_power/sim_cycle",  NULL);
951

    
952
  stat_reg_formula(sdb, "avg_dcache2_power", "avg power usage of dcache2", "dcache2_power/sim_cycle",  NULL);
953

    
954
  stat_reg_formula(sdb, "avg_alu_power", "avg power usage of alu", "alu_power/sim_cycle",  NULL);
955

    
956
  stat_reg_formula(sdb, "avg_falu_power", "avg power usage of falu", "falu_power/sim_cycle",  NULL);
957

    
958
  stat_reg_formula(sdb, "avg_resultbus_power", "avg power usage of resultbus", "resultbus_power/sim_cycle",  NULL);
959

    
960
  stat_reg_formula(sdb, "avg_clock_power", "avg power usage of clock", "clock_power/sim_cycle",  NULL);
961

    
962
  stat_reg_formula(sdb, "fetch_stage_power", "total power usage of fetch stage", "icache_power + bpred_power", NULL);
963

    
964
  stat_reg_formula(sdb, "dispatch_stage_power", "total power usage of dispatch stage", "rename_power", NULL);
965

    
966
  stat_reg_formula(sdb, "issue_stage_power", "total power usage of issue stage", "resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power", NULL);
967

    
968
  stat_reg_formula(sdb, "avg_fetch_power", "average power of fetch unit per cycle", "(icache_power + bpred_power)/ sim_cycle", /* format */NULL);
969

    
970
  stat_reg_formula(sdb, "avg_dispatch_power", "average power of dispatch unit per cycle", "(rename_power)/ sim_cycle", /* format */NULL);
971

    
972
  stat_reg_formula(sdb, "avg_issue_power", "average power of issue unit per cycle", "(resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power)/ sim_cycle", /* format */NULL);
973

    
974
  stat_reg_formula(sdb, "total_power", "total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power  + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)", NULL);
975

    
976
  stat_reg_formula(sdb, "avg_total_power_cycle", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_cycle", NULL);
977

    
978
  stat_reg_formula(sdb, "avg_total_power_cycle_nofp_nod2", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_cycle", NULL);
979

    
980
  stat_reg_formula(sdb, "avg_total_power_insn", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_total_insn", NULL);
981

    
982
  stat_reg_formula(sdb, "avg_total_power_insn_nofp_nod2", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_total_insn", NULL);
983

    
984
  stat_reg_double(sdb, "rename_power_cc1", "total power usage of rename unit_cc1", &rename_power_cc1, 0, NULL);
985

    
986
  stat_reg_double(sdb, "bpred_power_cc1", "total power usage of bpred unit_cc1", &bpred_power_cc1, 0, NULL);
987

    
988
  stat_reg_double(sdb, "window_power_cc1", "total power usage of instruction window_cc1", &window_power_cc1, 0, NULL);
989

    
990
  stat_reg_double(sdb, "lsq_power_cc1", "total power usage of lsq_cc1", &lsq_power_cc1, 0, NULL);
991

    
992
  stat_reg_double(sdb, "regfile_power_cc1", "total power usage of arch. regfile_cc1", &regfile_power_cc1, 0, NULL);
993

    
994
  stat_reg_double(sdb, "icache_power_cc1", "total power usage of icache_cc1", &icache_power_cc1, 0, NULL);
995

    
996
  stat_reg_double(sdb, "dcache_power_cc1", "total power usage of dcache_cc1", &dcache_power_cc1, 0, NULL);
997

    
998
  stat_reg_double(sdb, "dcache2_power_cc1", "total power usage of dcache2_cc1", &dcache2_power_cc1, 0, NULL);
999

    
1000
  stat_reg_double(sdb, "alu_power_cc1", "total power usage of alu_cc1", &alu_power_cc1, 0, NULL);
1001

    
1002
  stat_reg_double(sdb, "resultbus_power_cc1", "total power usage of resultbus_cc1", &resultbus_power_cc1, 0, NULL);
1003

    
1004
  stat_reg_double(sdb, "clock_power_cc1", "total power usage of clock_cc1", &clock_power_cc1, 0, NULL);
1005

    
1006
  stat_reg_formula(sdb, "avg_rename_power_cc1", "avg power usage of rename unit_cc1", "rename_power_cc1/sim_cycle", NULL);
1007

    
1008
  stat_reg_formula(sdb, "avg_bpred_power_cc1", "avg power usage of bpred unit_cc1", "bpred_power_cc1/sim_cycle", NULL);
1009

    
1010
  stat_reg_formula(sdb, "avg_window_power_cc1", "avg power usage of instruction window_cc1", "window_power_cc1/sim_cycle",  NULL);
1011

    
1012
  stat_reg_formula(sdb, "avg_lsq_power_cc1", "avg power usage of lsq_cc1", "lsq_power_cc1/sim_cycle",  NULL);
1013

    
1014
  stat_reg_formula(sdb, "avg_regfile_power_cc1", "avg power usage of arch. regfile_cc1", "regfile_power_cc1/sim_cycle",  NULL);
1015

    
1016
  stat_reg_formula(sdb, "avg_icache_power_cc1", "avg power usage of icache_cc1", "icache_power_cc1/sim_cycle",  NULL);
1017

    
1018
  stat_reg_formula(sdb, "avg_dcache_power_cc1", "avg power usage of dcache_cc1", "dcache_power_cc1/sim_cycle",  NULL);
1019

    
1020
  stat_reg_formula(sdb, "avg_dcache2_power_cc1", "avg power usage of dcache2_cc1", "dcache2_power_cc1/sim_cycle",  NULL);
1021

    
1022
  stat_reg_formula(sdb, "avg_alu_power_cc1", "avg power usage of alu_cc1", "alu_power_cc1/sim_cycle",  NULL);
1023

    
1024
  stat_reg_formula(sdb, "avg_resultbus_power_cc1", "avg power usage of resultbus_cc1", "resultbus_power_cc1/sim_cycle",  NULL);
1025

    
1026
  stat_reg_formula(sdb, "avg_clock_power_cc1", "avg power usage of clock_cc1", "clock_power_cc1/sim_cycle",  NULL);
1027

    
1028
  stat_reg_formula(sdb, "fetch_stage_power_cc1", "total power usage of fetch stage_cc1", "icache_power_cc1 + bpred_power_cc1", NULL);
1029

    
1030
  stat_reg_formula(sdb, "dispatch_stage_power_cc1", "total power usage of dispatch stage_cc1", "rename_power_cc1", NULL);
1031

    
1032
  stat_reg_formula(sdb, "issue_stage_power_cc1", "total power usage of issue stage_cc1", "resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1", NULL);
1033

    
1034
  stat_reg_formula(sdb, "avg_fetch_power_cc1", "average power of fetch unit per cycle_cc1", "(icache_power_cc1 + bpred_power_cc1)/ sim_cycle", /* format */NULL);
1035

    
1036
  stat_reg_formula(sdb, "avg_dispatch_power_cc1", "average power of dispatch unit per cycle_cc1", "(rename_power_cc1)/ sim_cycle", /* format */NULL);
1037

    
1038
  stat_reg_formula(sdb, "avg_issue_power_cc1", "average power of issue unit per cycle_cc1", "(resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1)/ sim_cycle", /* format */NULL);
1039

    
1040
  stat_reg_formula(sdb, "total_power_cycle_cc1", "total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)", NULL);
1041

    
1042
  stat_reg_formula(sdb, "avg_total_power_cycle_cc1", "average total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 +dcache2_power_cc1)/sim_cycle", NULL);
1043

    
1044
  stat_reg_formula(sdb, "avg_total_power_insn_cc1", "average total power per insn_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 +  alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)/sim_total_insn", NULL);
1045

    
1046
  stat_reg_double(sdb, "rename_power_cc2", "total power usage of rename unit_cc2", &rename_power_cc2, 0, NULL);
1047

    
1048
  stat_reg_double(sdb, "bpred_power_cc2", "total power usage of bpred unit_cc2", &bpred_power_cc2, 0, NULL);
1049

    
1050
  stat_reg_double(sdb, "window_power_cc2", "total power usage of instruction window_cc2", &window_power_cc2, 0, NULL);
1051

    
1052
  stat_reg_double(sdb, "lsq_power_cc2", "total power usage of lsq_cc2", &lsq_power_cc2, 0, NULL);
1053

    
1054
  stat_reg_double(sdb, "regfile_power_cc2", "total power usage of arch. regfile_cc2", &regfile_power_cc2, 0, NULL);
1055

    
1056
  stat_reg_double(sdb, "icache_power_cc2", "total power usage of icache_cc2", &icache_power_cc2, 0, NULL);
1057

    
1058
  stat_reg_double(sdb, "dcache_power_cc2", "total power usage of dcache_cc2", &dcache_power_cc2, 0, NULL);
1059

    
1060
  stat_reg_double(sdb, "dcache2_power_cc2", "total power usage of dcache2_cc2", &dcache2_power_cc2, 0, NULL);
1061

    
1062
  stat_reg_double(sdb, "alu_power_cc2", "total power usage of alu_cc2", &alu_power_cc2, 0, NULL);
1063

    
1064
  stat_reg_double(sdb, "resultbus_power_cc2", "total power usage of resultbus_cc2", &resultbus_power_cc2, 0, NULL);
1065

    
1066
  stat_reg_double(sdb, "clock_power_cc2", "total power usage of clock_cc2", &clock_power_cc2, 0, NULL);
1067

    
1068
  stat_reg_formula(sdb, "avg_rename_power_cc2", "avg power usage of rename unit_cc2", "rename_power_cc2/sim_cycle", NULL);
1069

    
1070
  stat_reg_formula(sdb, "avg_bpred_power_cc2", "avg power usage of bpred unit_cc2", "bpred_power_cc2/sim_cycle", NULL);
1071

    
1072
  stat_reg_formula(sdb, "avg_window_power_cc2", "avg power usage of instruction window_cc2", "window_power_cc2/sim_cycle",  NULL);
1073

    
1074
  stat_reg_formula(sdb, "avg_lsq_power_cc2", "avg power usage of instruction lsq_cc2", "lsq_power_cc2/sim_cycle",  NULL);
1075

    
1076
  stat_reg_formula(sdb, "avg_regfile_power_cc2", "avg power usage of arch. regfile_cc2", "regfile_power_cc2/sim_cycle",  NULL);
1077

    
1078
  stat_reg_formula(sdb, "avg_icache_power_cc2", "avg power usage of icache_cc2", "icache_power_cc2/sim_cycle",  NULL);
1079

    
1080
  stat_reg_formula(sdb, "avg_dcache_power_cc2", "avg power usage of dcache_cc2", "dcache_power_cc2/sim_cycle",  NULL);
1081

    
1082
  stat_reg_formula(sdb, "avg_dcache2_power_cc2", "avg power usage of dcache2_cc2", "dcache2_power_cc2/sim_cycle",  NULL);
1083

    
1084
  stat_reg_formula(sdb, "avg_alu_power_cc2", "avg power usage of alu_cc2", "alu_power_cc2/sim_cycle",  NULL);
1085

    
1086
  stat_reg_formula(sdb, "avg_resultbus_power_cc2", "avg power usage of resultbus_cc2", "resultbus_power_cc2/sim_cycle",  NULL);
1087

    
1088
  stat_reg_formula(sdb, "avg_clock_power_cc2", "avg power usage of clock_cc2", "clock_power_cc2/sim_cycle",  NULL);
1089

    
1090
  stat_reg_formula(sdb, "fetch_stage_power_cc2", "total power usage of fetch stage_cc2", "icache_power_cc2 + bpred_power_cc2", NULL);
1091

    
1092
  stat_reg_formula(sdb, "dispatch_stage_power_cc2", "total power usage of dispatch stage_cc2", "rename_power_cc2", NULL);
1093

    
1094
  stat_reg_formula(sdb, "issue_stage_power_cc2", "total power usage of issue stage_cc2", "resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2", NULL);
1095

    
1096
  stat_reg_formula(sdb, "avg_fetch_power_cc2", "average power of fetch unit per cycle_cc2", "(icache_power_cc2 + bpred_power_cc2)/ sim_cycle", /* format */NULL);
1097

    
1098
  stat_reg_formula(sdb, "avg_dispatch_power_cc2", "average power of dispatch unit per cycle_cc2", "(rename_power_cc2)/ sim_cycle", /* format */NULL);
1099

    
1100
  stat_reg_formula(sdb, "avg_issue_power_cc2", "average power of issue unit per cycle_cc2", "(resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2)/ sim_cycle", /* format */NULL);
1101

    
1102
  stat_reg_formula(sdb, "total_power_cycle_cc2", "total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)", NULL);
1103

    
1104
  stat_reg_formula(sdb, "avg_total_power_cycle_cc2", "average total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_cycle", NULL);
1105

    
1106
  stat_reg_formula(sdb, "avg_total_power_insn_cc2", "average total power per insn_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_total_insn", NULL);
1107

    
1108
  stat_reg_double(sdb, "rename_power_cc3", "total power usage of rename unit_cc3", &rename_power_cc3, 0, NULL);
1109

    
1110
  stat_reg_double(sdb, "bpred_power_cc3", "total power usage of bpred unit_cc3", &bpred_power_cc3, 0, NULL);
1111

    
1112
  stat_reg_double(sdb, "window_power_cc3", "total power usage of instruction window_cc3", &window_power_cc3, 0, NULL);
1113

    
1114
  stat_reg_double(sdb, "lsq_power_cc3", "total power usage of lsq_cc3", &lsq_power_cc3, 0, NULL);
1115

    
1116
  stat_reg_double(sdb, "regfile_power_cc3", "total power usage of arch. regfile_cc3", &regfile_power_cc3, 0, NULL);
1117

    
1118
  stat_reg_double(sdb, "icache_power_cc3", "total power usage of icache_cc3", &icache_power_cc3, 0, NULL);
1119

    
1120
  stat_reg_double(sdb, "dcache_power_cc3", "total power usage of dcache_cc3", &dcache_power_cc3, 0, NULL);
1121

    
1122
  stat_reg_double(sdb, "dcache2_power_cc3", "total power usage of dcache2_cc3", &dcache2_power_cc3, 0, NULL);
1123

    
1124
  stat_reg_double(sdb, "alu_power_cc3", "total power usage of alu_cc3", &alu_power_cc3, 0, NULL);
1125

    
1126
  stat_reg_double(sdb, "resultbus_power_cc3", "total power usage of resultbus_cc3", &resultbus_power_cc3, 0, NULL);
1127

    
1128
  stat_reg_double(sdb, "clock_power_cc3", "total power usage of clock_cc3", &clock_power_cc3, 0, NULL);
1129

    
1130
  stat_reg_formula(sdb, "avg_rename_power_cc3", "avg power usage of rename unit_cc3", "rename_power_cc3/sim_cycle", NULL);
1131

    
1132
  stat_reg_formula(sdb, "avg_bpred_power_cc3", "avg power usage of bpred unit_cc3", "bpred_power_cc3/sim_cycle", NULL);
1133

    
1134
  stat_reg_formula(sdb, "avg_window_power_cc3", "avg power usage of instruction window_cc3", "window_power_cc3/sim_cycle",  NULL);
1135

    
1136
  stat_reg_formula(sdb, "avg_lsq_power_cc3", "avg power usage of instruction lsq_cc3", "lsq_power_cc3/sim_cycle",  NULL);
1137

    
1138
  stat_reg_formula(sdb, "avg_regfile_power_cc3", "avg power usage of arch. regfile_cc3", "regfile_power_cc3/sim_cycle",  NULL);
1139

    
1140
  stat_reg_formula(sdb, "avg_icache_power_cc3", "avg power usage of icache_cc3", "icache_power_cc3/sim_cycle",  NULL);
1141

    
1142
  stat_reg_formula(sdb, "avg_dcache_power_cc3", "avg power usage of dcache_cc3", "dcache_power_cc3/sim_cycle",  NULL);
1143

    
1144
  stat_reg_formula(sdb, "avg_dcache2_power_cc3", "avg power usage of dcache2_cc3", "dcache2_power_cc3/sim_cycle",  NULL);
1145

    
1146
  stat_reg_formula(sdb, "avg_alu_power_cc3", "avg power usage of alu_cc3", "alu_power_cc3/sim_cycle",  NULL);
1147

    
1148
  stat_reg_formula(sdb, "avg_resultbus_power_cc3", "avg power usage of resultbus_cc3", "resultbus_power_cc3/sim_cycle",  NULL);
1149

    
1150
  stat_reg_formula(sdb, "avg_clock_power_cc3", "avg power usage of clock_cc3", "clock_power_cc3/sim_cycle",  NULL);
1151

    
1152
  stat_reg_formula(sdb, "fetch_stage_power_cc3", "total power usage of fetch stage_cc3", "icache_power_cc3 + bpred_power_cc3", NULL);
1153

    
1154
  stat_reg_formula(sdb, "dispatch_stage_power_cc3", "total power usage of dispatch stage_cc3", "rename_power_cc3", NULL);
1155

    
1156
  stat_reg_formula(sdb, "issue_stage_power_cc3", "total power usage of issue stage_cc3", "resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3", NULL);
1157

    
1158
  stat_reg_formula(sdb, "avg_fetch_power_cc3", "average power of fetch unit per cycle_cc3", "(icache_power_cc3 + bpred_power_cc3)/ sim_cycle", /* format */NULL);
1159

    
1160
  stat_reg_formula(sdb, "avg_dispatch_power_cc3", "average power of dispatch unit per cycle_cc3", "(rename_power_cc3)/ sim_cycle", /* format */NULL);
1161

    
1162
  stat_reg_formula(sdb, "avg_issue_power_cc3", "average power of issue unit per cycle_cc3", "(resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3)/ sim_cycle", /* format */NULL);
1163

    
1164
  stat_reg_formula(sdb, "total_power_cycle_cc3", "total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)", NULL);
1165

    
1166
  stat_reg_formula(sdb, "avg_total_power_cycle_cc3", "average total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_cycle", NULL);
1167

    
1168
  stat_reg_formula(sdb, "avg_total_power_insn_cc3", "average total power per insn_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_total_insn", NULL);
1169

    
1170
  stat_reg_counter(sdb, "total_rename_access", "total number accesses of rename unit", &total_rename_access, 0, NULL);
1171

    
1172
  stat_reg_counter(sdb, "total_bpred_access", "total number accesses of bpred unit", &total_bpred_access, 0, NULL);
1173

    
1174
  stat_reg_counter(sdb, "total_window_access", "total number accesses of instruction window", &total_window_access, 0, NULL);
1175

    
1176
  stat_reg_counter(sdb, "total_lsq_access", "total number accesses of load/store queue", &total_lsq_access, 0, NULL);
1177

    
1178
  stat_reg_counter(sdb, "total_regfile_access", "total number accesses of arch. regfile", &total_regfile_access, 0, NULL);
1179

    
1180
  stat_reg_counter(sdb, "total_icache_access", "total number accesses of icache", &total_icache_access, 0, NULL);
1181

    
1182
  stat_reg_counter(sdb, "total_dcache_access", "total number accesses of dcache", &total_dcache_access, 0, NULL);
1183

    
1184
  stat_reg_counter(sdb, "total_dcache2_access", "total number accesses of dcache2", &total_dcache2_access, 0, NULL);
1185

    
1186
  stat_reg_counter(sdb, "total_alu_access", "total number accesses of alu", &total_alu_access, 0, NULL);
1187

    
1188
  stat_reg_counter(sdb, "total_resultbus_access", "total number accesses of resultbus", &total_resultbus_access, 0, NULL);
1189

    
1190
  stat_reg_formula(sdb, "avg_rename_access", "avg number accesses of rename unit", "total_rename_access/sim_cycle", NULL);
1191

    
1192
  stat_reg_formula(sdb, "avg_bpred_access", "avg number accesses of bpred unit", "total_bpred_access/sim_cycle", NULL);
1193

    
1194
  stat_reg_formula(sdb, "avg_window_access", "avg number accesses of instruction window", "total_window_access/sim_cycle",  NULL);
1195

    
1196
  stat_reg_formula(sdb, "avg_lsq_access", "avg number accesses of lsq", "total_lsq_access/sim_cycle",  NULL);
1197

    
1198
  stat_reg_formula(sdb, "avg_regfile_access", "avg number accesses of arch. regfile", "total_regfile_access/sim_cycle",  NULL);
1199

    
1200
  stat_reg_formula(sdb, "avg_icache_access", "avg number accesses of icache", "total_icache_access/sim_cycle",  NULL);
1201

    
1202
  stat_reg_formula(sdb, "avg_dcache_access", "avg number accesses of dcache", "total_dcache_access/sim_cycle",  NULL);
1203

    
1204
  stat_reg_formula(sdb, "avg_dcache2_access", "avg number accesses of dcache2", "total_dcache2_access/sim_cycle",  NULL);
1205

    
1206
  stat_reg_formula(sdb, "avg_alu_access", "avg number accesses of alu", "total_alu_access/sim_cycle",  NULL);
1207

    
1208
  stat_reg_formula(sdb, "avg_resultbus_access", "avg number accesses of resultbus", "total_resultbus_access/sim_cycle",  NULL);
1209

    
1210
  stat_reg_counter(sdb, "max_rename_access", "max number accesses of rename unit", &max_rename_access, 0, NULL);
1211

    
1212
  stat_reg_counter(sdb, "max_bpred_access", "max number accesses of bpred unit", &max_bpred_access, 0, NULL);
1213

    
1214
  stat_reg_counter(sdb, "max_window_access", "max number accesses of instruction window", &max_window_access, 0, NULL);
1215

    
1216
  stat_reg_counter(sdb, "max_lsq_access", "max number accesses of load/store queue", &max_lsq_access, 0, NULL);
1217

    
1218
  stat_reg_counter(sdb, "max_regfile_access", "max number accesses of arch. regfile", &max_regfile_access, 0, NULL);
1219

    
1220
  stat_reg_counter(sdb, "max_icache_access", "max number accesses of icache", &max_icache_access, 0, NULL);
1221

    
1222
  stat_reg_counter(sdb, "max_dcache_access", "max number accesses of dcache", &max_dcache_access, 0, NULL);
1223

    
1224
  stat_reg_counter(sdb, "max_dcache2_access", "max number accesses of dcache2", &max_dcache2_access, 0, NULL);
1225

    
1226
  stat_reg_counter(sdb, "max_alu_access", "max number accesses of alu", &max_alu_access, 0, NULL);
1227

    
1228
  stat_reg_counter(sdb, "max_resultbus_access", "max number accesses of resultbus", &max_resultbus_access, 0, NULL);
1229

    
1230
  stat_reg_double(sdb, "max_cycle_power_cc1", "maximum cycle power usage of cc1", &max_cycle_power_cc1, 0, NULL);
1231

    
1232
  stat_reg_double(sdb, "max_cycle_power_cc2", "maximum cycle power usage of cc2", &max_cycle_power_cc2, 0, NULL);
1233

    
1234
  stat_reg_double(sdb, "max_cycle_power_cc3", "maximum cycle power usage of cc3", &max_cycle_power_cc3, 0, NULL);
1235
total_parasitic_cc3 = onchip_parasitic_cc3 + offchip_parasitic_cc3;
1236
  stat_reg_double(sdb, "parasitic_power_cc3", "total parasitic power cc3", &total_parasitic_cc3, 0, NULL);
1237
  stat_reg_double(sdb, "onchip parasitic_power_cc3", "onchip parasitic power cc3", &onchip_parasitic_cc3, 0, NULL);
1238
  stat_reg_double(sdb, "offchip parasitic_power_cc3", "offchip parasitic power cc3", &offchip_parasitic_cc3, 0, NULL);
1239
  stat_reg_double(sdb, "min amperage", "min amperage", &min_amp, 0, NULL);
1240
  stat_reg_double(sdb, "max amperage", "max amperage", &max_amp, 0, NULL);
1241
  stat_reg_double(sdb, "slow_cycles", "slow cycles", &slow_cycles, 0, NULL);
1242
  stat_reg_double(sdb, "fast_cycles", "fast cycles", &fast_cycles, 0, NULL);
1243
}
1244

    
1245

    
1246
/* this routine takes the number of rows and cols of an array structure
1247
   and attemps to make it make it more of a reasonable circuit structure
1248
   by trying to make the number of rows and cols as close as possible.
1249
   (scaling both by factors of 2 in opposite directions).  it returns
1250
   a scale factor which is the amount that the rows should be divided
1251
   by and the columns should be multiplied by.
1252
*/
1253
int squarify(int rows, int cols)
1254
{
1255
  int scale_factor = 1;
1256

    
1257
  if(rows == cols)
1258
    return 1;
1259

    
1260
  /*
1261
  printf("init rows == %d\n",rows);
1262
  printf("init cols == %d\n",cols);
1263
  */
1264

    
1265
  while(rows > cols) {
1266
    rows = rows/2;
1267
    cols = cols*2;
1268

    
1269
    /*
1270
    printf("rows == %d\n",rows);
1271
    printf("cols == %d\n",cols);
1272
    printf("scale_factor == %d (2^ == %d)\n\n",scale_factor,(int)pow(2.0,(double)scale_factor));
1273
    */
1274

    
1275
    if (rows/2 <= cols)
1276
      return((int)pow(2.0,(double)scale_factor));
1277
    scale_factor++;
1278
  }
1279

    
1280
  return 1;
1281
}
1282

    
1283
/* could improve squarify to work when rows < cols */
1284

    
1285
double squarify_new(int rows, int cols)
1286
{
1287
  double scale_factor = 0.0;
1288

    
1289
  if(rows==cols)
1290
    return(pow(2.0,scale_factor));
1291

    
1292
  while(rows > cols) {
1293
    rows = rows/2;
1294
    cols = cols*2;
1295
    if (rows <= cols)
1296
      return(pow(2.0,scale_factor));
1297
    scale_factor++;
1298
  }
1299

    
1300
  while(cols > rows) {
1301
    rows = rows*2;
1302
    cols = cols/2;
1303
    if (cols <= rows)
1304
      return(pow(2.0,scale_factor));
1305
    scale_factor--;
1306
  }
1307

    
1308
  return 1;
1309

    
1310
}
1311

    
1312
void dump_power_stats(power)
1313
     power_result_type *power;
1314
{
1315
  double total_power;
1316
  double bpred_power;
1317
  double rename_power;
1318
  double rat_power;
1319
  double dcl_power;
1320
  double lsq_power;
1321
  double window_power;
1322
  double wakeup_power;
1323
  double rs_power;
1324
  double lsq_wakeup_power;
1325
  double lsq_rs_power;
1326
  double regfile_power;
1327
  double reorder_power;
1328
  double icache_power;
1329
  double dcache_power;
1330
  double dcache2_power;
1331
  double dtlb_power;
1332
  double itlb_power;
1333
  double ambient_power = 2.0;
1334

    
1335
  icache_power = power->icache_power;
1336

    
1337
  dcache_power = power->dcache_power;
1338

    
1339
  dcache2_power = power->dcache2_power;
1340

    
1341
  itlb_power = power->itlb;
1342
  dtlb_power = power->dtlb;
1343

    
1344
  bpred_power = power->btb + power->local_predict + power->global_predict + 
1345
    power->chooser + power->ras;
1346

    
1347
  rat_power = power->rat_decoder + 
1348
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
1349

    
1350
  dcl_power = power->dcl_compare + power->dcl_pencode;
1351

    
1352
  rename_power = power->rat_power + power->dcl_power + power->inst_decoder_power;
1353

    
1354
  wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
1355
    power->wakeup_ormatch;
1356
   
1357
  rs_power = power->rs_decoder + 
1358
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
1359

    
1360
  window_power = wakeup_power + rs_power + power->selection;
1361

    
1362
  lsq_rs_power = power->lsq_rs_decoder + 
1363
    power->lsq_rs_wordline + power->lsq_rs_bitline + power->lsq_rs_senseamp;
1364

    
1365
  lsq_wakeup_power = power->lsq_wakeup_tagdrive + 
1366
    power->lsq_wakeup_tagmatch + power->lsq_wakeup_ormatch;
1367

    
1368
  lsq_power = lsq_wakeup_power + lsq_rs_power;
1369

    
1370
  reorder_power = power->reorder_decoder + 
1371
    power->reorder_wordline + power->reorder_bitline + 
1372
    power->reorder_senseamp;
1373

    
1374
  regfile_power = power->regfile_decoder + 
1375
    power->regfile_wordline + power->regfile_bitline + 
1376
    power->regfile_senseamp;
1377

    
1378
  total_power = bpred_power + rename_power + window_power + regfile_power +
1379
    power->resultbus + lsq_power + 
1380
    icache_power + dcache_power + dcache2_power + 
1381
    dtlb_power + itlb_power + power->clock_power + power->ialu_power +
1382
    power->falu_power;
1383

    
1384
  fprintf(stderr,"\nProcessor Parameters:\n");
1385
  fprintf(stderr,"Issue Width: %d\n",ruu_issue_width);
1386
  fprintf(stderr,"Window Size: %d\n",RUU_size);
1387
  fprintf(stderr,"Number of Virtual Registers: %d\n",MD_NUM_IREGS);
1388
  fprintf(stderr,"Number of Physical Registers: %d\n",RUU_size);
1389
  fprintf(stderr,"Datapath Width: %d\n",data_width);
1390

    
1391
  fprintf(stderr,"Total Power Consumption: %g\n",total_power+ambient_power);
1392
  fprintf(stderr,"Branch Predictor Power Consumption: %g  (%.3g%%)\n",bpred_power,100*bpred_power/total_power);
1393
  fprintf(stderr," branch target buffer power (W): %g\n",power->btb);
1394
  fprintf(stderr," local predict power (W): %g\n",power->local_predict);
1395
  fprintf(stderr," global predict power (W): %g\n",power->global_predict);
1396
  fprintf(stderr," chooser power (W): %g\n",power->chooser);
1397
  fprintf(stderr," RAS power (W): %g\n",power->ras);
1398
  fprintf(stderr,"Rename Logic Power Consumption: %g  (%.3g%%)\n",rename_power,100*rename_power/total_power);
1399
  fprintf(stderr," Instruction Decode Power (W): %g\n",power->inst_decoder_power);
1400
  fprintf(stderr," RAT decode_power (W): %g\n",power->rat_decoder);
1401
  fprintf(stderr," RAT wordline_power (W): %g\n",power->rat_wordline);
1402
  fprintf(stderr," RAT bitline_power (W): %g\n",power->rat_bitline);
1403
  fprintf(stderr," DCL Comparators (W): %g\n",power->dcl_compare);
1404
  fprintf(stderr,"Instruction Window Power Consumption: %g  (%.3g%%)\n",window_power,100*window_power/total_power);
1405
  fprintf(stderr," tagdrive (W): %g\n",power->wakeup_tagdrive);
1406
  fprintf(stderr," tagmatch (W): %g\n",power->wakeup_tagmatch);
1407
  fprintf(stderr," Selection Logic (W): %g\n",power->selection);
1408
  fprintf(stderr," decode_power (W): %g\n",power->rs_decoder);
1409
  fprintf(stderr," wordline_power (W): %g\n",power->rs_wordline);
1410
  fprintf(stderr," bitline_power (W): %g\n",power->rs_bitline);
1411
  fprintf(stderr,"Load/Store Queue Power Consumption: %g  (%.3g%%)\n",lsq_power,100*lsq_power/total_power);
1412
  fprintf(stderr," tagdrive (W): %g\n",power->lsq_wakeup_tagdrive);
1413
  fprintf(stderr," tagmatch (W): %g\n",power->lsq_wakeup_tagmatch);
1414
  fprintf(stderr," decode_power (W): %g\n",power->lsq_rs_decoder);
1415
  fprintf(stderr," wordline_power (W): %g\n",power->lsq_rs_wordline);
1416
  fprintf(stderr," bitline_power (W): %g\n",power->lsq_rs_bitline);
1417
  fprintf(stderr,"Arch. Register File Power Consumption: %g  (%.3g%%)\n",regfile_power,100*regfile_power/total_power);
1418
  fprintf(stderr," decode_power (W): %g\n",power->regfile_decoder);
1419
  fprintf(stderr," wordline_power (W): %g\n",power->regfile_wordline);
1420
  fprintf(stderr," bitline_power (W): %g\n",power->regfile_bitline);
1421
  fprintf(stderr,"Result Bus Power Consumption: %g  (%.3g%%)\n",power->resultbus,100*power->resultbus/total_power);
1422
  fprintf(stderr,"Total Clock Power: %g  (%.3g%%)\n",power->clock_power,100*power->clock_power/total_power);
1423
  fprintf(stderr,"Int ALU Power: %g  (%.3g%%)\n",power->ialu_power,100*power->ialu_power/total_power);
1424
  fprintf(stderr,"FP ALU Power: %g  (%.3g%%)\n",power->falu_power,100*power->falu_power/total_power);
1425
  fprintf(stderr,"Instruction Cache Power Consumption: %g  (%.3g%%)\n",icache_power,100*icache_power/total_power);
1426
  fprintf(stderr," decode_power (W): %g\n",power->icache_decoder);
1427
  fprintf(stderr," wordline_power (W): %g\n",power->icache_wordline);
1428
  fprintf(stderr," bitline_power (W): %g\n",power->icache_bitline);
1429
  fprintf(stderr," senseamp_power (W): %g\n",power->icache_senseamp);
1430
  fprintf(stderr," tagarray_power (W): %g\n",power->icache_tagarray);
1431
  fprintf(stderr,"Itlb_power (W): %g (%.3g%%)\n",power->itlb,100*power->itlb/total_power);
1432
  fprintf(stderr,"Data Cache Power Consumption: %g  (%.3g%%)\n",dcache_power,100*dcache_power/total_power);
1433
  fprintf(stderr," decode_power (W): %g\n",power->dcache_decoder);
1434
  fprintf(stderr," wordline_power (W): %g\n",power->dcache_wordline);
1435
  fprintf(stderr," bitline_power (W): %g\n",power->dcache_bitline);
1436
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache_senseamp);
1437
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache_tagarray);
1438
  fprintf(stderr,"Dtlb_power (W): %g (%.3g%%)\n",power->dtlb,100*power->dtlb/total_power);
1439
  fprintf(stderr,"Level 2 Cache Power Consumption: %g (%.3g%%)\n",dcache2_power,100*dcache2_power/total_power);
1440
  fprintf(stderr," decode_power (W): %g\n",power->dcache2_decoder);
1441
  fprintf(stderr," wordline_power (W): %g\n",power->dcache2_wordline);
1442
  fprintf(stderr," bitline_power (W): %g\n",power->dcache2_bitline);
1443
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache2_senseamp);
1444
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache2_tagarray);
1445
}
1446

    
1447
/*======================================================================*/
1448

    
1449

    
1450

    
1451
/* 
1452
 * This part of the code contains routines for each section as
1453
 * described in the tech report.  See the tech report for more details
1454
 * and explanations */
1455

    
1456
/*----------------------------------------------------------------------*/
1457

    
1458
double driver_size(double driving_cap, double desiredrisetime) {
1459
  double nsize, psize;
1460
  double Rpdrive; 
1461

    
1462
  Rpdrive = desiredrisetime/(driving_cap*log(VSINV)*-1.0);
1463
  psize = restowidth(Rpdrive,PCH);
1464
  nsize = restowidth(Rpdrive,NCH);
1465
  if (psize > Wworddrivemax) {
1466
    psize = Wworddrivemax;
1467
  }
1468
  if (psize < 4.0 * LSCALE)
1469
    psize = 4.0 * LSCALE;
1470

    
1471
  return (psize);
1472

    
1473
}
1474

    
1475
/* Decoder delay:  (see section 6.1 of tech report) */
1476

    
1477
double array_decoder_power(rows,cols,predeclength,rports,wports,cache)
1478
     int rows,cols;
1479
     double predeclength;
1480
     int rports,wports;
1481
     int cache;
1482
{
1483
  double Ctotal=0;
1484
  double Ceq=0;
1485
  int numstack;
1486
  int decode_bits=0;
1487
  int ports;
1488
  double rowsb;
1489

    
1490
  /* read and write ports are the same here */
1491
  ports = rports + wports;
1492

    
1493
  rowsb = (double)rows;
1494

    
1495
  /* number of input bits to be decoded */
1496
  decode_bits=ceil((logtwo(rowsb)));
1497

    
1498
  /* First stage: driving the decoders */
1499

    
1500
  /* This is the capacitance for driving one bit (and its complement).
1501
     -There are #rowsb 3->8 decoders contributing gatecap.
1502
     - 2.0 factor from 2 identical sets of drivers in parallel
1503
  */
1504
  Ceq = 2.0*(draincap(Wdecdrivep,PCH,1)+draincap(Wdecdriven,NCH,1)) +
1505
    gatecap(Wdec3to8n+Wdec3to8p,10.0)*rowsb;
1506

    
1507
  /* There are ports * #decode_bits total */
1508
  Ctotal+=ports*decode_bits*Ceq;
1509

    
1510
  if(verbose)
1511
    fprintf(stderr,"Decoder -- Driving decoders            == %g\n",.3*Ctotal*Powerfactor);
1512

    
1513
  /* second stage: driving a bunch of nor gates with a nand 
1514
     numstack is the size of the nor gates -- ie. a 7-128 decoder has
1515
     3-input NAND followed by 3-input NOR  */
1516

    
1517
  numstack = ceil((1.0/3.0)*logtwo(rows));
1518

    
1519
  if (numstack<=0) numstack = 1;
1520
  if (numstack>5) numstack = 5;
1521

    
1522
  /* There are #rowsb NOR gates being driven*/
1523
  Ceq = (3.0*draincap(Wdec3to8p,PCH,1) +draincap(Wdec3to8n,NCH,3) +
1524
         gatecap(WdecNORn+WdecNORp,((numstack*40)+20.0)))*rowsb;
1525

    
1526
  Ctotal+=ports*Ceq;
1527

    
1528
  if(verbose)
1529
    fprintf(stderr,"Decoder -- Driving nor w/ nand         == %g\n",.3*ports*Ceq*Powerfactor);
1530

    
1531
  /* Final stage: driving an inverter with the nor 
1532
     (inverter preceding wordline driver) -- wordline driver is in the next section*/
1533

    
1534
  Ceq = (gatecap(Wdecinvn+Wdecinvp,20.0)+
1535
         numstack*draincap(WdecNORn,NCH,1)+
1536
         draincap(WdecNORp,PCH,numstack));
1537

    
1538
  if(verbose)
1539
    fprintf(stderr,"Decoder -- Driving inverter w/ nor     == %g\n",.3*ports*Ceq*Powerfactor);
1540

    
1541
  Ctotal+=ports*Ceq;
1542

    
1543
  /* assume Activity Factor == .3  */
1544

    
1545
  return(.3*Ctotal*Powerfactor);
1546
}
1547

    
1548
double simple_array_decoder_power(rows,cols,rports,wports,cache)
1549
     int rows,cols;
1550
     int rports,wports;
1551
     int cache;
1552
{
1553
  double predeclength=0.0;
1554
  return(array_decoder_power(rows,cols,predeclength,rports,wports,cache));
1555
}
1556

    
1557

    
1558
double array_wordline_power(rows,cols,wordlinelength,rports,wports,cache)
1559
     int rows,cols;
1560
     double wordlinelength;
1561
     int rports,wports;
1562
     int cache;
1563
{
1564
  double Ctotal=0;
1565
  double Ceq=0;
1566
  double Cline=0;
1567
  double Cliner, Clinew=0;
1568
  double desiredrisetime,psize,nsize;
1569
  int ports;
1570
  double colsb;
1571

    
1572
  ports = rports+wports;
1573

    
1574
  colsb = (double)cols;
1575

    
1576
  /* Calculate size of wordline drivers assuming rise time == Period / 8 
1577
     - estimate cap on line 
1578
     - compute min resistance to achieve this with RC 
1579
     - compute width needed to achieve this resistance */
1580

    
1581
  desiredrisetime = Period/16;
1582
  Cline = (gatecappass(Wmemcellr,1.0))*colsb + wordlinelength*CM3metal;
1583
  psize = driver_size(Cline,desiredrisetime);
1584
  
1585
  /* how do we want to do p-n ratioing? -- here we just assume the same ratio 
1586
     from an inverter pair  */
1587
  nsize = psize * Wdecinvn/Wdecinvp; 
1588
  
1589
  if(verbose)
1590
    fprintf(stderr,"Wordline Driver Sizes -- nsize == %f, psize == %f\n",nsize,psize);
1591

    
1592
  Ceq = draincap(Wdecinvn,NCH,1) + draincap(Wdecinvp,PCH,1) +
1593
    gatecap(nsize+psize,20.0);
1594

    
1595
  Ctotal+=ports*Ceq;
1596

    
1597
  if(verbose)
1598
    fprintf(stderr,"Wordline -- Inverter -> Driver         == %g\n",ports*Ceq*Powerfactor);
1599

    
1600
  /* Compute caps of read wordline and write wordlines 
1601
     - wordline driver caps, given computed width from above
1602
     - read wordlines have 1 nmos access tx, size ~4
1603
     - write wordlines have 2 nmos access tx, size ~2
1604
     - metal line cap
1605
  */
1606

    
1607
  Cliner = (gatecappass(Wmemcellr,(BitWidth-2*Wmemcellr)/2.0))*colsb+
1608
    wordlinelength*CM3metal+
1609
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1610
  Clinew = (2.0*gatecappass(Wmemcellw,(BitWidth-2*Wmemcellw)/2.0))*colsb+
1611
    wordlinelength*CM3metal+
1612
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1613

    
1614
  if(verbose) {
1615
    fprintf(stderr,"Wordline -- Line                       == %g\n",1e12*Cline);
1616
    fprintf(stderr,"Wordline -- Line -- access -- gatecap  == %g\n",1e12*colsb*2*gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0));
1617
    fprintf(stderr,"Wordline -- Line -- driver -- draincap == %g\n",1e12*draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1618
    fprintf(stderr,"Wordline -- Line -- metal              == %g\n",1e12*wordlinelength*CM3metal);
1619
  }
1620
  Ctotal+=rports*Cliner+wports*Clinew;
1621

    
1622
  /* AF == 1 assuming a different wordline is charged each cycle, but only
1623
     1 wordline (per port) is actually used */
1624

    
1625
  return(Ctotal*Powerfactor);
1626
}
1627

    
1628
double simple_array_wordline_power(rows,cols,rports,wports,cache)
1629
     int rows,cols;
1630
     int rports,wports;
1631
     int cache;
1632
{
1633
  double wordlinelength;
1634
  int ports = rports + wports;
1635
  wordlinelength = cols *  (RegCellWidth + 2 * ports * BitlineSpacing);
1636
  return(array_wordline_power(rows,cols,wordlinelength,rports,wports,cache));
1637
}
1638

    
1639

    
1640
double array_bitline_power(rows,cols,bitlinelength,rports,wports,cache)
1641
     int rows,cols;
1642
     double bitlinelength;
1643
     int rports,wports;
1644
     int cache;
1645
{
1646
  double Ctotal=0;
1647
  double Ccolmux=0;
1648
  double Cbitrowr=0;
1649
  double Cbitroww=0;
1650
  double Cprerow=0;
1651
  double Cwritebitdrive=0;
1652
  double Cpregate=0;
1653
  double Cliner=0;
1654
  double Clinew=0;
1655
  int ports;
1656
  double rowsb;
1657
  double colsb;
1658

    
1659
  double desiredrisetime, Cline, psize, nsize;
1660

    
1661
  ports = rports + wports;
1662

    
1663
  rowsb = (double)rows;
1664
  colsb = (double)cols;
1665

    
1666
  /* Draincaps of access tx's */
1667

    
1668
  Cbitrowr = draincap(Wmemcellr,NCH,1);
1669
  Cbitroww = draincap(Wmemcellw,NCH,1);
1670

    
1671
  /* Cprerow -- precharge cap on the bitline
1672
     -simple scheme to estimate size of pre-charge tx's in a similar fashion
1673
      to wordline driver size estimation.
1674
     -FIXME: it would be better to use precharge/keeper pairs, i've omitted this
1675
      from this version because it couldn't autosize as easily.
1676
  */
1677

    
1678
  desiredrisetime = Period/8;
1679

    
1680
  Cline = rowsb*Cbitrowr+CM2metal*bitlinelength;
1681
  psize = driver_size(Cline,desiredrisetime);
1682

    
1683
  /* compensate for not having an nmos pre-charging */
1684
  psize = psize + psize * Wdecinvn/Wdecinvp; 
1685

    
1686
  if(verbose)
1687
    printf("Cprerow auto   == %g (psize == %g)\n",draincap(psize,PCH,1),psize);
1688

    
1689
  Cprerow = draincap(psize,PCH,1);
1690

    
1691
  /* Cpregate -- cap due to gatecap of precharge transistors -- tack this
1692
     onto bitline cap, again this could have a keeper */
1693
  Cpregate = 4.0*gatecap(psize,10.0);
1694
  global_clockcap+=rports*cols*2.0*Cpregate;
1695

    
1696
  /* Cwritebitdrive -- write bitline drivers are used instead of the precharge
1697
     stuff for write bitlines
1698
     - 2 inverter drivers within each driver pair */
1699

    
1700
  Cline = rowsb*Cbitroww+CM2metal*bitlinelength;
1701

    
1702
  psize = driver_size(Cline,desiredrisetime);
1703
  nsize = psize * Wdecinvn/Wdecinvp; 
1704

    
1705
  Cwritebitdrive = 2.0*(draincap(psize,PCH,1)+draincap(nsize,NCH,1));
1706

    
1707
  /* 
1708
     reg files (cache==0) 
1709
     => single ended bitlines (1 bitline/col)
1710
     => AFs from pop_count
1711
     caches (cache ==1)
1712
     => double-ended bitlines (2 bitlines/col)
1713
     => AFs = .5 (since one of the two bitlines is always charging/discharging)
1714
  */
1715

    
1716
#ifdef STATIC_AF
1717
  if (cache == 0) {
1718
    /* compute the total line cap for read/write bitlines */
1719
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1720
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1721

    
1722
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1723
       in cache styles) */
1724
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1725
    Ctotal+=(1.0-POPCOUNT_AF)*rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1726
    Ctotal+=.3*wports*cols*(Clinew+Cwritebitdrive);
1727
  } 
1728
  else { 
1729
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1730
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1731
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1732
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1733
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1734
  }
1735
#else
1736
  if (cache == 0) {
1737
    /* compute the total line cap for read/write bitlines */
1738
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1739
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1740

    
1741
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1742
       in cache styles) */
1743
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1744
    Ctotal += rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1745
    Ctotal += .3*wports*cols*(Clinew+Cwritebitdrive);
1746
  } 
1747
  else { 
1748
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1749
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1750
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1751
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1752
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1753
  }
1754
#endif
1755

    
1756
  if(verbose) {
1757
    fprintf(stderr,"Bitline -- Precharge                   == %g\n",1e12*Cpregate);
1758
    fprintf(stderr,"Bitline -- Line                        == %g\n",1e12*(Cliner+Clinew));
1759
    fprintf(stderr,"Bitline -- Line -- access draincap     == %g\n",1e12*rowsb*Cbitrowr);
1760
    fprintf(stderr,"Bitline -- Line -- precharge draincap  == %g\n",1e12*Cprerow);
1761
    fprintf(stderr,"Bitline -- Line -- metal               == %g\n",1e12*bitlinelength*CM2metal);
1762
    fprintf(stderr,"Bitline -- Colmux                      == %g\n",1e12*Ccolmux);
1763

    
1764
    fprintf(stderr,"\n");
1765
  }
1766

    
1767

    
1768
  if(cache==0)
1769
    return(Ctotal*Powerfactor);
1770
  else
1771
    return(Ctotal*SensePowerfactor*.4);
1772
  
1773
}
1774

    
1775

    
1776
double simple_array_bitline_power(rows,cols,rports,wports,cache)
1777
     int rows,cols;
1778
     int rports,wports;
1779
     int cache;
1780
{
1781
  double bitlinelength;
1782

    
1783
  int ports = rports + wports;
1784

    
1785
  bitlinelength = rows * (RegCellHeight + ports * WordlineSpacing);
1786

    
1787
  return (array_bitline_power(rows,cols,bitlinelength,rports,wports,cache));
1788

    
1789
}
1790

    
1791
/* estimate senseamp power dissipation in cache structures (Zyuban's method) */
1792
double senseamp_power(int cols)
1793
{
1794
  return((double)cols * Vdd/8 * .5e-3);
1795
}
1796

    
1797
/* estimate comparator power consumption (this comparator is similar
1798
   to the tag-match structure in a CAM */
1799
double compare_cap(int compare_bits)
1800
{
1801
  double c1, c2;
1802
  /* bottom part of comparator */
1803
  c2 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2))+
1804
    draincap(Wevalinvp,PCH,1) + draincap(Wevalinvn,NCH,1);
1805

    
1806
  /* top part of comparator */
1807
  c1 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2)+
1808
                       draincap(Wcomppreequ,NCH,1)) +
1809
    gatecap(WdecNORn,1.0)+
1810
    gatecap(WdecNORp,3.0);
1811

    
1812
  return(c1 + c2);
1813
}
1814

    
1815
/* power of depency check logic */
1816
double dcl_compare_power(int compare_bits)
1817
{
1818
  double Ctotal;
1819
  int num_comparators;
1820
  
1821
  num_comparators = (ruu_decode_width - 1) * (ruu_decode_width);
1822

    
1823
  Ctotal = num_comparators * compare_cap(compare_bits);
1824

    
1825
  return(Ctotal*Powerfactor*AF);
1826
}
1827

    
1828
double simple_array_power(rows,cols,rports,wports,cache)
1829
     int rows,cols;
1830
     int rports,wports;
1831
     int cache;
1832
{
1833
  if(cache==0)
1834
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1835
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1836
            simple_array_bitline_power(rows,cols,rports,wports,cache));
1837
  else
1838
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1839
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1840
            simple_array_bitline_power(rows,cols,rports,wports,cache)+
1841
            senseamp_power(cols));
1842
}
1843

    
1844

    
1845
double cam_tagdrive(rows,cols,rports,wports)
1846
     int rows,cols,rports,wports;
1847
{
1848
  double Ctotal, Ctlcap, Cblcap, Cwlcap;
1849
  double taglinelength;
1850
  double wordlinelength;
1851
  double nsize, psize;
1852
  int ports;
1853
  Ctotal=0;
1854

    
1855
  ports = rports + wports;
1856

    
1857
  taglinelength = rows * 
1858
    (CamCellHeight + ports * MatchlineSpacing);
1859

    
1860
  wordlinelength = cols * 
1861
    (CamCellWidth + ports * TaglineSpacing);
1862

    
1863
  /* Compute tagline cap */
1864
  Ctlcap = Cmetal * taglinelength + 
1865
    rows * gatecappass(Wcomparen2,2.0) +
1866
    draincap(Wcompdrivern,NCH,1)+draincap(Wcompdriverp,PCH,1);
1867

    
1868
  /* Compute bitline cap (for writing new tags) */
1869
  Cblcap = Cmetal * taglinelength +
1870
    rows * draincap(Wmemcellr,NCH,2);
1871

    
1872
  /* autosize wordline driver */
1873
  psize = driver_size(Cmetal * wordlinelength + 2 * cols * gatecap(Wmemcellr,2.0),Period/8);
1874
  nsize = psize * Wdecinvn/Wdecinvp; 
1875

    
1876
  /* Compute wordline cap (for writing new tags) */
1877
  Cwlcap = Cmetal * wordlinelength + 
1878
    draincap(nsize,NCH,1)+draincap(psize,PCH,1) +
1879
    2 * cols * gatecap(Wmemcellr,2.0);
1880
    
1881
  Ctotal += (rports * cols * 2 * Ctlcap) + 
1882
    (wports * ((cols * 2 * Cblcap) + (rows * Cwlcap)));
1883

    
1884
  return(Ctotal*Powerfactor*AF);
1885
}
1886

    
1887
double cam_tagmatch(rows,cols,rports,wports)
1888
     int rows,cols,rports,wports;
1889
{
1890
  double Ctotal, Cmlcap;
1891
  double matchlinelength;
1892
  int ports;
1893
  Ctotal=0;
1894

    
1895
  ports = rports + wports;
1896

    
1897
  matchlinelength = cols * 
1898
    (CamCellWidth + ports * TaglineSpacing);
1899

    
1900
  Cmlcap = 2 * cols * draincap(Wcomparen1,NCH,2) + 
1901
    Cmetal * matchlinelength + draincap(Wmatchpchg,NCH,1) +
1902
    gatecap(Wmatchinvn+Wmatchinvp,10.0) +
1903
    gatecap(Wmatchnandn+Wmatchnandp,10.0);
1904

    
1905
  Ctotal += rports * rows * Cmlcap;
1906

    
1907
  global_clockcap += rports * rows * gatecap(Wmatchpchg,5.0);
1908
  
1909
  /* noring the nanded match lines */
1910
  if(ruu_issue_width >= 8)
1911
    Ctotal += 2 * gatecap(Wmatchnorn+Wmatchnorp,10.0);
1912

    
1913
  return(Ctotal*Powerfactor*AF);
1914
}
1915

    
1916
double cam_array(rows,cols,rports,wports)
1917
     int rows,cols,rports,wports;
1918
{
1919
  return(cam_tagdrive(rows,cols,rports,wports) +
1920
         cam_tagmatch(rows,cols,rports,wports));
1921
}
1922

    
1923

    
1924
double selection_power(int win_entries)
1925
{
1926
  double Ctotal, Cor, Cpencode;
1927
  int num_arbiter=1;
1928

    
1929
  Ctotal=0;
1930

    
1931
  while(win_entries > 4)
1932
    {
1933
      win_entries = (int)ceil((double)win_entries / 4.0);
1934
      num_arbiter += win_entries;
1935
    }
1936

    
1937
  Cor = 4 * draincap(WSelORn,NCH,1) + draincap(WSelORprequ,PCH,1);
1938

    
1939
  Cpencode = draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,1) + 
1940
    2*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,2) + 
1941
    3*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,3) + 
1942
    4*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,4) + 
1943
    4*gatecap(WSelEnn+WSelEnp,20.0) + 
1944
    4*draincap(WSelEnn,NCH,1) + 4*draincap(WSelEnp,PCH,1);
1945

    
1946
  Ctotal += ruu_issue_width * num_arbiter*(Cor+Cpencode);
1947

    
1948
  return(Ctotal*Powerfactor*AF);
1949
}
1950

    
1951
/* very rough clock power estimates */
1952
double total_clockpower(double die_length)
1953
{
1954

    
1955
  double clocklinelength;
1956
  double Cline,Cline2,Ctotal;
1957
  double pipereg_clockcap=0;
1958
  double global_buffercap = 0;
1959
  double Clockpower;
1960

    
1961
  double num_piperegs;
1962

    
1963
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
1964

    
1965
  /* Assume say 8 stages (kinda low now).
1966
     FIXME: this could be a lot better; user could input
1967
     number of pipestages, etc  */
1968

    
1969
  /* assume 8 pipe stages and try to estimate bits per pipe stage */
1970
  /* pipe stage 0/1 */
1971
  num_piperegs = ruu_issue_width*inst_length + data_width;
1972
  /* pipe stage 1/2 */
1973
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1974
  /* pipe stage 2/3 */
1975
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1976
  /* pipe stage 3/4 */
1977
  num_piperegs += ruu_issue_width*(3 * npreg_width + pow2(opcode_length));
1978
  /* pipe stage 4/5 */
1979
  num_piperegs += ruu_issue_width*(2*data_width + pow2(opcode_length));
1980
  /* pipe stage 5/6 */
1981
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1982
  /* pipe stage 6/7 */
1983
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1984
  /* pipe stage 7/8 */
1985
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1986

    
1987
  /* assume 50% extra in control signals (rule of thumb) */
1988
  num_piperegs = num_piperegs * 1.5;
1989

    
1990
  pipereg_clockcap = num_piperegs * 4*gatecap(10.0,0);
1991

    
1992
  /* estimate based on 3% of die being in clock metal */
1993
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
1994

    
1995
  /* another estimate */
1996
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
1997
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
1998
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
1999
  /* global_clockcap is computed within each array structure for pre-charge tx's*/
2000
  Ctotal = Cline+global_clockcap+pipereg_clockcap+global_buffercap;
2001

    
2002
  if(verbose)
2003
    fprintf(stderr,"num_piperegs == %f\n",num_piperegs);
2004

    
2005
  /* add I_ADD Clockcap and F_ADD Clockcap */
2006
  Clockpower = Ctotal*Powerfactor + res_ialu*I_ADD_CLOCK + res_fpalu*F_ADD_CLOCK;
2007

    
2008
  if(verbose) {
2009
    fprintf(stderr,"Global Clock Power: %g\n",Clockpower);
2010
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
2011
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
2012
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
2013
    fprintf(stderr," Global Clock Cap (Explicit) (W): %g\n",global_clockcap*Powerfactor+I_ADD_CLOCK+F_ADD_CLOCK);
2014
    fprintf(stderr," Global Clock Cap (Implicit) (W): %g\n",pipereg_clockcap*Powerfactor);
2015
  }
2016
  return(Clockpower);
2017

    
2018
}
2019

    
2020
/* very rough global clock power estimates */
2021
double global_clockpower(double die_length)
2022
{
2023

    
2024
  double clocklinelength;
2025
  double Cline,Cline2,Ctotal;
2026
  double global_buffercap = 0;
2027

    
2028
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
2029

    
2030
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
2031
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
2032
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
2033
  Ctotal = Cline+global_buffercap;
2034

    
2035
  if(verbose) {
2036
    fprintf(stderr,"Global Clock Power: %g\n",Ctotal*Powerfactor);
2037
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
2038
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
2039
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
2040
  }
2041

    
2042
  return(Ctotal*Powerfactor);
2043

    
2044
}
2045

    
2046

    
2047
double compute_resultbus_power()
2048
{
2049
  double Ctotal, Cline;
2050

    
2051
  double regfile_height;
2052

    
2053
  /* compute size of result bus tags */
2054
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2055

    
2056
  Ctotal=0;
2057

    
2058
  regfile_height = RUU_size * (RegCellHeight + 
2059
                               WordlineSpacing * 3 * ruu_issue_width); 
2060

    
2061
  /* assume num alu's == ialu  (FIXME: generate a more detailed result bus network model*/
2062
  Cline = Cmetal * (regfile_height + .5 * res_ialu * 3200.0 * LSCALE);
2063

    
2064
  /* or use result bus length measured from 21264 die photo */
2065
  /*  Cline = Cmetal * 3.3*1000;*/
2066

    
2067
  /* Assume ruu_issue_width result busses -- power can be scaled linearly
2068
     for number of result busses (scale by writeback_access) */
2069
  Ctotal += 2.0 * (data_width + npreg_width) * (ruu_issue_width)* Cline;
2070

    
2071
#ifdef STATIC_AF
2072
  return(Ctotal*Powerfactor*AF);
2073
#else
2074
  return(Ctotal*Powerfactor);
2075
#endif
2076
  
2077
}
2078

    
2079
void calculate_power(power)
2080
     power_result_type *power;
2081
{
2082
  double clockpower;
2083
  double predeclength, wordlinelength, bitlinelength;
2084
  int ndwl, ndbl, nspd, ntwl, ntbl, ntspd, c,b,a,cache, rowsb, colsb;
2085
  int trowsb, tcolsb, tagsize;
2086
  int va_size = 48;
2087

    
2088
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2089

    
2090
  /* these variables are needed to use Cacti to auto-size cache arrays 
2091
     (for optimal delay) */
2092
  time_result_type time_result;
2093
  time_parameter_type time_parameters;
2094

    
2095
  /* used to autosize other structures, like bpred tables */
2096
  int scale_factor;
2097

    
2098
  global_clockcap = 0;
2099

    
2100
  cache=0;
2101

    
2102

    
2103
  /* FIXME: ALU power is a simple constant, it would be better
2104
     to include bit AFs and have different numbers for different
2105
     types of operations */
2106
  power->ialu_power = res_ialu * I_ADD;
2107
  power->falu_power = res_fpalu * F_ADD;
2108

    
2109
  nvreg_width = (int)ceil(logtwo((double)MD_NUM_IREGS));
2110
  npreg_width = (int)ceil(logtwo((double)RUU_size));
2111

    
2112

    
2113
  /* RAT has shadow bits stored in each cell, this makes the
2114
     cell size larger than normal array structures, so we must
2115
     compute it here */
2116

    
2117
  predeclength = MD_NUM_IREGS * 
2118
    (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2119

    
2120
  wordlinelength = npreg_width * 
2121
    (RatCellWidth + 
2122
     6 * ruu_decode_width * BitlineSpacing + 
2123
     RatShiftRegWidth*RatNumShift);
2124

    
2125
  bitlinelength = MD_NUM_IREGS * (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2126

    
2127
  if(verbose)
2128
    fprintf(stderr,"rat power stats\n");
2129
  power->rat_decoder = array_decoder_power(MD_NUM_IREGS,npreg_width,predeclength,2*ruu_decode_width,ruu_decode_width,cache);
2130
  power->rat_wordline = array_wordline_power(MD_NUM_IREGS,npreg_width,wordlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2131
  power->rat_bitline = array_bitline_power(MD_NUM_IREGS,npreg_width,bitlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2132
  power->rat_senseamp = 0;
2133

    
2134
  power->dcl_compare = dcl_compare_power(nvreg_width);
2135
  power->dcl_pencode = 0;
2136
  power->inst_decoder_power = ruu_decode_width * simple_array_decoder_power(opcode_length,1,1,1,cache);
2137
  power->wakeup_tagdrive =cam_tagdrive(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2138
  power->wakeup_tagmatch =cam_tagmatch(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2139
  power->wakeup_ormatch =0; 
2140

    
2141
  power->selection = selection_power(RUU_size);
2142

    
2143

    
2144
  predeclength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2145

    
2146
  wordlinelength = data_width * 
2147
    (RegCellWidth + 
2148
     6 * ruu_issue_width * BitlineSpacing);
2149

    
2150
  bitlinelength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2151

    
2152
  if(verbose)
2153
    fprintf(stderr,"regfile power stats\n");
2154

    
2155
  power->regfile_decoder = array_decoder_power(MD_NUM_IREGS,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2156
  power->regfile_wordline = array_wordline_power(MD_NUM_IREGS,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2157
  power->regfile_bitline = array_bitline_power(MD_NUM_IREGS,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2158
  power->regfile_senseamp =0;
2159

    
2160
  predeclength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2161

    
2162
  wordlinelength = data_width * 
2163
    (RegCellWidth + 
2164
     6 * ruu_issue_width * BitlineSpacing);
2165

    
2166
  bitlinelength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2167

    
2168
  if(verbose)
2169
    fprintf(stderr,"res station power stats\n");
2170
  power->rs_decoder = array_decoder_power(RUU_size,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2171
  power->rs_wordline = array_wordline_power(RUU_size,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2172
  power->rs_bitline = array_bitline_power(RUU_size,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2173
  /* no senseamps in reg file structures (only caches) */
2174
  power->rs_senseamp =0;
2175

    
2176
  /* addresses go into lsq tag's */
2177
  power->lsq_wakeup_tagdrive =cam_tagdrive(LSQ_size,data_width,res_memport,res_memport);
2178
  power->lsq_wakeup_tagmatch =cam_tagmatch(LSQ_size,data_width,res_memport,res_memport);
2179
  power->lsq_wakeup_ormatch =0; 
2180

    
2181
  wordlinelength = data_width * 
2182
    (RegCellWidth + 
2183
     4 * res_memport * BitlineSpacing);
2184

    
2185
  bitlinelength = RUU_size * (RegCellHeight + 4 * res_memport * WordlineSpacing);
2186

    
2187
  /* rs's hold data */
2188
  if(verbose)
2189
    fprintf(stderr,"lsq station power stats\n");
2190
  power->lsq_rs_decoder = array_decoder_power(LSQ_size,data_width,predeclength,res_memport,res_memport,cache);
2191
  power->lsq_rs_wordline = array_wordline_power(LSQ_size,data_width,wordlinelength,res_memport,res_memport,cache);
2192
  power->lsq_rs_bitline = array_bitline_power(LSQ_size,data_width,bitlinelength,res_memport,res_memport,cache);
2193
  power->lsq_rs_senseamp =0;
2194

    
2195
  power->resultbus = compute_resultbus_power();
2196

    
2197
  /* Load cache values into what cacti is expecting */
2198
  time_parameters.cache_size = btb_config[0] * (data_width/8) * btb_config[1]; /* C */
2199
  time_parameters.block_size = (data_width/8); /* B */
2200
  time_parameters.associativity = btb_config[1]; /* A */
2201
  time_parameters.number_of_sets = btb_config[0]; /* C/(B*A) */
2202

    
2203
  /* have Cacti compute optimal cache config */
2204
  calculate_time(&time_result,&time_parameters);
2205
  output_data(&time_result,&time_parameters);
2206

    
2207
  /* extract Cacti results */
2208
  ndwl=time_result.best_Ndwl;
2209
  ndbl=time_result.best_Ndbl;
2210
  nspd=time_result.best_Nspd;
2211
  ntwl=time_result.best_Ntwl;
2212
  ntbl=time_result.best_Ntbl;
2213
  ntspd=time_result.best_Ntspd;
2214
  c = time_parameters.cache_size;
2215
  b = time_parameters.block_size;
2216
  a = time_parameters.associativity; 
2217

    
2218
  cache=1;
2219

    
2220
  /* Figure out how many rows/cols there are now */
2221
  rowsb = c/(b*a*ndbl*nspd);
2222
  colsb = 8*b*a*nspd/ndwl;
2223

    
2224
  if(verbose) {
2225
    fprintf(stderr,"%d KB %d-way btb (%d-byte block size):\n",c,a,b);
2226
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2227
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2228
  }
2229

    
2230
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2231
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2232
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2233

    
2234
  if(verbose)
2235
    fprintf(stderr,"btb power stats\n");
2236
  power->btb = ndwl*ndbl*(array_decoder_power(rowsb,colsb,predeclength,1,1,cache) + array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache) + array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache) + senseamp_power(colsb));
2237

    
2238
  cache=1;
2239

    
2240
  scale_factor = squarify(twolev_config[0],twolev_config[2]);
2241
  predeclength = (twolev_config[0] / scale_factor)* (RegCellHeight + WordlineSpacing);
2242
  wordlinelength = twolev_config[2] * scale_factor *  (RegCellWidth + BitlineSpacing);
2243
  bitlinelength = (twolev_config[0] / scale_factor) * (RegCellHeight + WordlineSpacing);
2244

    
2245
  if(verbose)
2246
    fprintf(stderr,"local predict power stats\n");
2247

    
2248
  power->local_predict = array_decoder_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,bitlinelength,1,1,cache) + senseamp_power(twolev_config[2]*scale_factor);
2249

    
2250
  scale_factor = squarify(twolev_config[1],3);
2251

    
2252
  predeclength = (twolev_config[1] / scale_factor)* (RegCellHeight + WordlineSpacing);
2253
  wordlinelength = 3 * scale_factor *  (RegCellWidth + BitlineSpacing);
2254
  bitlinelength = (twolev_config[1] / scale_factor) * (RegCellHeight + WordlineSpacing);
2255

    
2256

    
2257
  if(verbose)
2258
    fprintf(stderr,"local predict power stats\n");
2259
  power->local_predict += array_decoder_power(twolev_config[1]/scale_factor,3*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[1]/scale_factor,3*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[1]/scale_factor,3*scale_factor,bitlinelength,1,1,cache) + senseamp_power(3*scale_factor);
2260

    
2261
  if(verbose)
2262
    fprintf(stderr,"bimod_config[0] == %d\n",bimod_config[0]);
2263

    
2264
  scale_factor = squarify(bimod_config[0],2);
2265

    
2266
  predeclength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2267
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2268
  bitlinelength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2269

    
2270

    
2271
  if(verbose)
2272
    fprintf(stderr,"global predict power stats\n");
2273
  power->global_predict = array_decoder_power(bimod_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(bimod_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(bimod_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2274

    
2275
  scale_factor = squarify(comb_config[0],2);
2276

    
2277
  predeclength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2278
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2279
  bitlinelength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2280

    
2281
  if(verbose)
2282
    fprintf(stderr,"chooser predict power stats\n");
2283
  power->chooser = array_decoder_power(comb_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(comb_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(comb_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2284

    
2285
  if(verbose)
2286
    fprintf(stderr,"RAS predict power stats\n");
2287
  power->ras = simple_array_power(ras_size,data_width,1,1,0);
2288

    
2289
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2290

    
2291
  if(verbose)
2292
    fprintf(stderr,"dtlb predict power stats\n");
2293
  power->dtlb = res_memport*(cam_array(dtlb->nsets, va_size - (int)logtwo((double)dtlb->bsize),1,1) + simple_array_power(dtlb->nsets,tagsize,1,1,cache));
2294

    
2295
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2296

    
2297
  predeclength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2298
  wordlinelength = logtwo((double)itlb->bsize) * (RegCellWidth + BitlineSpacing);
2299
  bitlinelength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2300

    
2301
  if(verbose)
2302
    fprintf(stderr,"itlb predict power stats\n");
2303
  power->itlb = cam_array(itlb->nsets, va_size - (int)logtwo((double)itlb->bsize),1,1) + simple_array_power(itlb->nsets,tagsize,1,1,cache);
2304

    
2305

    
2306
  cache=1;
2307

    
2308
  time_parameters.cache_size = cache_il1->nsets * cache_il1->bsize * cache_il1->assoc; /* C */
2309
  time_parameters.block_size = cache_il1->bsize; /* B */
2310
  time_parameters.associativity = cache_il1->assoc; /* A */
2311
  time_parameters.number_of_sets = cache_il1->nsets; /* C/(B*A) */
2312

    
2313
  calculate_time(&time_result,&time_parameters);
2314
  output_data(&time_result,&time_parameters);
2315

    
2316
  ndwl=time_result.best_Ndwl;
2317
  ndbl=time_result.best_Ndbl;
2318
  nspd=time_result.best_Nspd;
2319
  ntwl=time_result.best_Ntwl;
2320
  ntbl=time_result.best_Ntbl;
2321
  ntspd=time_result.best_Ntspd;
2322

    
2323
  c = time_parameters.cache_size;
2324
  b = time_parameters.block_size;
2325
  a = time_parameters.associativity;
2326

    
2327
  rowsb = c/(b*a*ndbl*nspd);
2328
  colsb = 8*b*a*nspd/ndwl;
2329

    
2330
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2331
  trowsb = c/(b*a*ntbl*ntspd);
2332
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2333
 
2334
  if(verbose) {
2335
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2336
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2337
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2338
    fprintf(stderr,"tagsize == %d\n",tagsize);
2339
  }
2340

    
2341
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2342
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2343
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2344

    
2345
  if(verbose)
2346
    fprintf(stderr,"icache power stats\n");
2347
  power->icache_decoder = ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2348
  power->icache_wordline = ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2349
  power->icache_bitline = ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2350
  power->icache_senseamp = ndwl*ndbl*senseamp_power(colsb);
2351
  power->icache_tagarray = ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2352

    
2353
  power->icache_power = power->icache_decoder + power->icache_wordline + power->icache_bitline + power->icache_senseamp + power->icache_tagarray;
2354

    
2355
  time_parameters.cache_size = cache_dl1->nsets * cache_dl1->bsize * cache_dl1->assoc; /* C */
2356
  time_parameters.block_size = cache_dl1->bsize; /* B */
2357
  time_parameters.associativity = cache_dl1->assoc; /* A */
2358
  time_parameters.number_of_sets = cache_dl1->nsets; /* C/(B*A) */
2359

    
2360
  calculate_time(&time_result,&time_parameters);
2361
  output_data(&time_result,&time_parameters);
2362

    
2363
  ndwl=time_result.best_Ndwl;
2364
  ndbl=time_result.best_Ndbl;
2365
  nspd=time_result.best_Nspd;
2366
  ntwl=time_result.best_Ntwl;
2367
  ntbl=time_result.best_Ntbl;
2368
  ntspd=time_result.best_Ntspd;
2369
  c = time_parameters.cache_size;
2370
  b = time_parameters.block_size;
2371
  a = time_parameters.associativity; 
2372

    
2373
  cache=1;
2374

    
2375
  rowsb = c/(b*a*ndbl*nspd);
2376
  colsb = 8*b*a*nspd/ndwl;
2377

    
2378
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2379
  trowsb = c/(b*a*ntbl*ntspd);
2380
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2381

    
2382
  if(verbose) {
2383
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2384
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2385
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2386
    fprintf(stderr,"tagsize == %d\n",tagsize);
2387

    
2388
    fprintf(stderr,"\nntwl == %d, ntbl == %d, ntspd == %d\n",ntwl,ntbl,ntspd);
2389
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ntwl*ntbl,trowsb,tcolsb);
2390
  }
2391

    
2392
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2393
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2394
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2395

    
2396
  if(verbose)
2397
    fprintf(stderr,"dcache power stats\n");
2398
  power->dcache_decoder = res_memport*ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2399
  power->dcache_wordline = res_memport*ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2400
  power->dcache_bitline = res_memport*ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2401
  power->dcache_senseamp = res_memport*ndwl*ndbl*senseamp_power(colsb);
2402
  power->dcache_tagarray = res_memport*ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2403

    
2404
  power->dcache_power = power->dcache_decoder + power->dcache_wordline + power->dcache_bitline + power->dcache_senseamp + power->dcache_tagarray;
2405

    
2406
  clockpower = total_clockpower(.018);
2407
  power->clock_power = clockpower;
2408
  if(verbose) {
2409
    fprintf(stderr,"result bus power == %f\n",power->resultbus);
2410
    fprintf(stderr,"global clock power == %f\n",clockpower);
2411
  }
2412

    
2413
  time_parameters.cache_size = cache_dl2->nsets * cache_dl2->bsize * cache_dl2->assoc; /* C */
2414
  time_parameters.block_size = cache_dl2->bsize; /* B */
2415
  time_parameters.associativity = cache_dl2->assoc; /* A */
2416
  time_parameters.number_of_sets = cache_dl2->nsets; /* C/(B*A) */
2417

    
2418
  calculate_time(&time_result,&time_parameters);
2419
  output_data(&time_result,&time_parameters);
2420

    
2421
  ndwl=time_result.best_Ndwl;
2422
  ndbl=time_result.best_Ndbl;
2423
  nspd=time_result.best_Nspd;
2424
  ntwl=time_result.best_Ntwl;
2425
  ntbl=time_result.best_Ntbl;
2426
  ntspd=time_result.best_Ntspd;
2427
  c = time_parameters.cache_size;
2428
  b = time_parameters.block_size;
2429
  a = time_parameters.associativity;
2430

    
2431
  rowsb = c/(b*a*ndbl*nspd);
2432
  colsb = 8*b*a*nspd/ndwl;
2433

    
2434
  tagsize = va_size - ((int)logtwo(cache_dl2->nsets) + (int)logtwo(cache_dl2->bsize));
2435
  trowsb = c/(b*a*ntbl*ntspd);
2436
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2437

    
2438
  if(verbose) {
2439
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2440
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2441
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2442
    fprintf(stderr,"tagsize == %d\n",tagsize);
2443
  }
2444

    
2445
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2446
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2447
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2448

    
2449
  if(verbose)
2450
    fprintf(stderr,"dcache2 power stats\n");
2451
  power->dcache2_decoder = array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2452
  power->dcache2_wordline = array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2453
  power->dcache2_bitline = array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2454
  power->dcache2_senseamp = senseamp_power(colsb);
2455
  power->dcache2_tagarray = simple_array_power(trowsb,tcolsb,1,1,cache);
2456

    
2457
  power->dcache2_power = power->dcache2_decoder + power->dcache2_wordline + power->dcache2_bitline + power->dcache2_senseamp + power->dcache2_tagarray;
2458

    
2459
  power->rat_decoder *= crossover_scaling;
2460
  power->rat_wordline *= crossover_scaling;
2461
  power->rat_bitline *= crossover_scaling;
2462

    
2463
  power->dcl_compare *= crossover_scaling;
2464
  power->dcl_pencode *= crossover_scaling;
2465
  power->inst_decoder_power *= crossover_scaling;
2466
  power->wakeup_tagdrive *= crossover_scaling;
2467
  power->wakeup_tagmatch *= crossover_scaling;
2468
  power->wakeup_ormatch *= crossover_scaling;
2469

    
2470
  power->selection *= crossover_scaling;
2471

    
2472
  power->regfile_decoder *= crossover_scaling;
2473
  power->regfile_wordline *= crossover_scaling;
2474
  power->regfile_bitline *= crossover_scaling;
2475
  power->regfile_senseamp *= crossover_scaling;
2476

    
2477
  power->rs_decoder *= crossover_scaling;
2478
  power->rs_wordline *= crossover_scaling;
2479
  power->rs_bitline *= crossover_scaling;
2480
  power->rs_senseamp *= crossover_scaling;
2481

    
2482
  power->lsq_wakeup_tagdrive *= crossover_scaling;
2483
  power->lsq_wakeup_tagmatch *= crossover_scaling;
2484

    
2485
  power->lsq_rs_decoder *= crossover_scaling;
2486
  power->lsq_rs_wordline *= crossover_scaling;
2487
  power->lsq_rs_bitline *= crossover_scaling;
2488
  power->lsq_rs_senseamp *= crossover_scaling;
2489
 
2490
  power->resultbus *= crossover_scaling;
2491

    
2492
  power->btb *= crossover_scaling;
2493
  power->local_predict *= crossover_scaling;
2494
  power->global_predict *= crossover_scaling;
2495
  power->chooser *= crossover_scaling;
2496

    
2497
  power->dtlb *= crossover_scaling;
2498

    
2499
  power->itlb *= crossover_scaling;
2500

    
2501
  power->icache_decoder *= crossover_scaling;
2502
  power->icache_wordline*= crossover_scaling;
2503
  power->icache_bitline *= crossover_scaling;
2504
  power->icache_senseamp*= crossover_scaling;
2505
  power->icache_tagarray*= crossover_scaling;
2506

    
2507
  power->icache_power *= crossover_scaling;
2508

    
2509
  power->dcache_decoder *= crossover_scaling;
2510
  power->dcache_wordline *= crossover_scaling;
2511
  power->dcache_bitline *= crossover_scaling;
2512
  power->dcache_senseamp *= crossover_scaling;
2513
  power->dcache_tagarray *= crossover_scaling;
2514

    
2515
  power->dcache_power *= crossover_scaling;
2516
  
2517
  power->clock_power *= crossover_scaling;
2518

    
2519
  power->dcache2_decoder *= crossover_scaling;
2520
  power->dcache2_wordline *= crossover_scaling;
2521
  power->dcache2_bitline *= crossover_scaling;
2522
  power->dcache2_senseamp *= crossover_scaling;
2523
  power->dcache2_tagarray *= crossover_scaling;
2524

    
2525
  power->dcache2_power *= crossover_scaling;
2526

    
2527
  power->total_power = power->local_predict + power->global_predict + 
2528
    power->chooser + power->btb +
2529
    power->rat_decoder + power->rat_wordline + 
2530
    power->rat_bitline + power->rat_senseamp + 
2531
    power->dcl_compare + power->dcl_pencode + 
2532
    power->inst_decoder_power +
2533
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2534
    power->selection +
2535
    power->regfile_decoder + power->regfile_wordline + 
2536
    power->regfile_bitline + power->regfile_senseamp +  
2537
    power->rs_decoder + power->rs_wordline +
2538
    power->rs_bitline + power->rs_senseamp + 
2539
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2540
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2541
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2542
    power->resultbus +
2543
    power->clock_power +
2544
    power->icache_power + 
2545
    power->itlb + 
2546
    power->dcache_power + 
2547
    power->dtlb + 
2548
    power->dcache2_power;
2549

    
2550
  power->total_power_nodcache2 =power->local_predict + power->global_predict + 
2551
    power->chooser + power->btb +
2552
    power->rat_decoder + power->rat_wordline + 
2553
    power->rat_bitline + power->rat_senseamp + 
2554
    power->dcl_compare + power->dcl_pencode + 
2555
    power->inst_decoder_power +
2556
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2557
    power->selection +
2558
    power->regfile_decoder + power->regfile_wordline + 
2559
    power->regfile_bitline + power->regfile_senseamp +  
2560
    power->rs_decoder + power->rs_wordline +
2561
    power->rs_bitline + power->rs_senseamp + 
2562
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2563
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2564
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2565
    power->resultbus +
2566
    power->clock_power +
2567
    power->icache_power + 
2568
    power->itlb + 
2569
    power->dcache_power + 
2570
    power->dtlb + 
2571
    power->dcache2_power;
2572

    
2573
  power->bpred_power = power->btb + power->local_predict + power->global_predict + power->chooser + power->ras;
2574

    
2575
  power->rat_power = power->rat_decoder + 
2576
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
2577

    
2578
  power->dcl_power = power->dcl_compare + power->dcl_pencode;
2579

    
2580
  power->rename_power = power->rat_power + 
2581
    power->dcl_power + 
2582
    power->inst_decoder_power;
2583

    
2584
  power->wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
2585
    power->wakeup_ormatch;
2586

    
2587
  power->rs_power = power->rs_decoder + 
2588
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
2589

    
2590
  power->rs_power_nobit = power->rs_decoder + 
2591
    power->rs_wordline + power->rs_senseamp;
2592

    
2593
  power->window_power = power->wakeup_power + power->rs_power + 
2594
    power->selection;
2595

    
2596
  power->lsq_rs_power = power->lsq_rs_decoder + 
2597
    power->lsq_rs_wordline + power->lsq_rs_bitline + 
2598
    power->lsq_rs_senseamp;
2599

    
2600
  power->lsq_rs_power_nobit = power->lsq_rs_decoder + 
2601
    power->lsq_rs_wordline + power->lsq_rs_senseamp;
2602
   
2603
  power->lsq_wakeup_power = power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch;
2604

    
2605
  power->lsq_power = power->lsq_wakeup_power + power->lsq_rs_power;
2606

    
2607
  power->regfile_power = power->regfile_decoder + 
2608
    power->regfile_wordline + power->regfile_bitline + 
2609
    power->regfile_senseamp;
2610

    
2611
  power->regfile_power_nobit = power->regfile_decoder + 
2612
    power->regfile_wordline + power->regfile_senseamp;
2613

    
2614
  dump_power_stats(power);
2615

    
2616
}