Statistics
| Revision:

root / wattch / power.c @ 51

History | View | Annotate | Download (102 KB)

1
/* I inclued this copyright since we're using Cacti for some stuff */
2

    
3
/*------------------------------------------------------------
4
 *  Copyright 1994 Digital Equipment Corporation and Steve Wilton
5
 *                         All Rights Reserved
6
 *
7
 * Permission to use, copy, and modify this software and its documentation is
8
 * hereby granted only under the following terms and conditions.  Both the
9
 * above copyright notice and this permission notice must appear in all copies
10
 * of the software, derivative works or modified versions, and any portions
11
 * thereof, and both notices must appear in supporting documentation.
12
 *
13
 * Users of this software agree to the terms and conditions set forth herein,
14
 * and hereby grant back to Digital a non-exclusive, unrestricted, royalty-
15
 * free right and license under any changes, enhancements or extensions
16
 * made to the core functions of the software, including but not limited to
17
 * those affording compatibility with other hardware or software
18
 * environments, but excluding applications which incorporate this software.
19
 * Users further agree to use their best efforts to return to Digital any
20
 * such changes, enhancements or extensions that they make and inform Digital
21
 * of noteworthy uses of this software.  Correspondence should be provided
22
 * to Digital at:
23
 *
24
 *                       Director of Licensing
25
 *                       Western Research Laboratory
26
 *                       Digital Equipment Corporation
27
 *                       100 Hamilton Avenue
28
 *                       Palo Alto, California  94301
29
 *
30
 * This software may be distributed (but not offered for sale or transferred
31
 * for compensation) to third parties, provided such third parties agree to
32
 * abide by the terms and conditions of this notice.
33
 *
34
 * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
35
 * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
36
 * OF MERCHANTABILITY AND FITNESS.   IN NO EVENT SHALL DIGITAL EQUIPMENT
37
 * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
38
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
39
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
41
 * SOFTWARE.
42
 *------------------------------------------------------------*/
43

    
44
#include <math.h>
45
#include "power.h"
46
#include "machine.h"
47
#include "cache.h"
48
#include "sim.h"
49
#include <assert.h>
50

    
51
//#define SensePowerfactor (Mhz)*(Vdd/2)*(Vdd/2)
52
//#define Sense2Powerfactor (Mhz)*(2*.3+.1*Vdd)
53
//#define Powerfactor (Mhz)*Vdd*Vdd
54
//#define LowSwingPowerfactor (Mhz)*.2*.2
55
/* set scale for crossover (vdd->gnd) currents */
56
double crossover_scaling = 1.2;
57
/* set non-ideal turnoff percentage */
58
double turnoff_factor = 0.1;
59

    
60
#define MSCALE (LSCALE * .624 / .2250)
61

    
62
/*----------------------------------------------------------------------*/
63

    
64
/* static power model results */
65
power_result_type power;
66

    
67
int pow2(int x) {
68
  return((int)pow(2.0,(double)x));
69
}
70

    
71
double logfour(x)
72
     double x;
73
{
74
  if (x<=0) fprintf(stderr,"%e\n",x);
75
  return( (double) (log(x)/log(4.0)) );
76
}
77

    
78
/* safer pop count to validate the fast algorithm */
79
int pop_count_slow(bquad_t bits)
80
{
81
  int count = 0; 
82
  bquad_t tmpbits = bits; 
83
  while (tmpbits) { 
84
    if (tmpbits & 1) ++count; 
85
    tmpbits >>= 1; 
86
  } 
87
  return count; 
88
}
89

    
90
/* fast pop count */
91
int pop_count(bquad_t bits)
92
{
93
#define T unsigned long long
94
#define ONES ((T)(-1)) 
95
#define TWO(k) ((T)1 << (k)) 
96
#define CYCL(k) (ONES/(1 + (TWO(TWO(k))))) 
97
#define BSUM(x,k) ((x)+=(x) >> TWO(k), (x) &= CYCL(k)) 
98
  bquad_t x = bits; 
99
  x = (x & CYCL(0)) + ((x>>TWO(0)) & CYCL(0)); 
100
  x = (x & CYCL(1)) + ((x>>TWO(1)) & CYCL(1)); 
101
  BSUM(x,2); 
102
  BSUM(x,3); 
103
  BSUM(x,4); 
104
  BSUM(x,5); 
105
  return x; 
106
}
107

    
108

    
109
int opcode_length = 8;
110
int inst_length = 32;
111

    
112
extern int ruu_decode_width;
113
extern int ruu_issue_width;
114
extern int ruu_commit_width;
115
extern int RUU_size;
116
extern int LSQ_size;
117
extern int data_width;
118
extern int res_ialu;
119
extern int res_fpalu;
120
extern int res_memport;
121

    
122
int nvreg_width;
123
int npreg_width;
124

    
125
extern int bimod_config[];
126

    
127
extern struct cache_t *cache_dl1;
128
extern struct cache_t *cache_il1;
129
extern struct cache_t *cache_dl2;
130

    
131
extern struct cache_t *dtlb;
132
extern struct cache_t *itlb;
133

    
134
/* 2-level predictor config (<l1size> <l2size> <hist_size> <xor>) */
135
extern int twolev_config[];
136

    
137
/* combining predictor config (<meta_table_size> */
138
extern int comb_config[];
139

    
140
/* return address stack (RAS) size */
141
extern int ras_size;
142

    
143
/* BTB predictor config (<num_sets> <associativity>) */
144
extern int btb_config[];
145

    
146
double global_clockcap;
147

    
148
static double rename_power=0;
149
static double bpred_power=0;
150
static double window_power=0;
151
static double lsq_power=0;
152
static double regfile_power=0;
153
static double icache_power=0;
154
static double dcache_power=0;
155
static double dcache2_power=0;
156
static double alu_power=0;
157
static double falu_power=0;
158
static double resultbus_power=0;
159
static double clock_power=0;
160

    
161
static double rename_power_cc1=0;
162
static double bpred_power_cc1=0;
163
static double window_power_cc1=0;
164
static double lsq_power_cc1=0;
165
static double regfile_power_cc1=0;
166
static double icache_power_cc1=0;
167
static double dcache_power_cc1=0;
168
static double dcache2_power_cc1=0;
169
static double alu_power_cc1=0;
170
static double resultbus_power_cc1=0;
171
static double clock_power_cc1=0;
172

    
173
static double rename_power_cc2=0;
174
static double bpred_power_cc2=0;
175
static double window_power_cc2=0;
176
static double lsq_power_cc2=0;
177
static double regfile_power_cc2=0;
178
static double icache_power_cc2=0;
179
static double dcache_power_cc2=0;
180
static double dcache2_power_cc2=0;
181
static double alu_power_cc2=0;
182
static double resultbus_power_cc2=0;
183
static double clock_power_cc2=0;
184

    
185
static double rename_power_cc3=0;
186
static double bpred_power_cc3=0;
187
static double window_power_cc3=0;
188
static double lsq_power_cc3=0;
189
static double regfile_power_cc3=0;
190
static double icache_power_cc3=0;
191
static double dcache_power_cc3=0;
192
static double dcache2_power_cc3=0;
193
static double alu_power_cc3=0;
194
static double resultbus_power_cc3=0;
195
static double clock_power_cc3=0;
196

    
197
static double total_cycle_power;
198
static double total_cycle_power_cc1;
199
static double total_cycle_power_cc2;
200
static double total_cycle_power_cc3;
201

    
202
static double total_parasitic_cc1 = 0.0;
203
static double total_parasitic_cc2 = 0.0;
204
static double total_parasitic_cc3 = 0.0;
205
#define PARASITIC_OHM 0.002
206
static double max_amp = 0.00;
207
static double min_amp = 1000.00;
208
static double offchip_ploss[] = {0.5, 0.5, // 1 amp
209
                                 0.5, 0.5, // 2 amp
210
                                 0.5, 0.5, // 3 amp
211
                                 0.6, 0.7, // 4
212
                                 0.8, 0.9, // 5
213
                                 1.0, 1.1, // 6
214
                                 1.2, 1.3, // 7
215
                                 1.5, 1.6, // 8
216
                                 1.8, 2.0, // 9
217
                                 2.2, 2.4, // 10
218
                                 2.6, 2.8, // 11
219
                                 3.0, 3.3, // 12
220
                                 3.6, 3.9, 4.0}; // 13
221

    
222
static double last_single_total_cycle_power_cc1 = 0.0;
223
static double last_single_total_cycle_power_cc2 = 0.0;
224
static double last_single_total_cycle_power_cc3 = 0.0;
225
static double current_total_cycle_power_cc1;
226
static double current_total_cycle_power_cc2;
227
static double current_total_cycle_power_cc3;
228

    
229
static double last_sim_num_insn = 0;
230
static double last_sim_total_insn = 0;
231
static double diff_dispatch = 0;
232
static double diff_commit = 0;
233
static int speed_grade = 1;
234
static int last_speed_grade = 1;
235
static double diff_dispatch_sum = 0;
236
static double diff_commit_sum = 0;
237
static int init_count = 0;
238
//#define DVFS_FIX
239
#define SUM_OVER 50000 // longer time = more power consumed
240
static double hist_dispatch[SUM_OVER];
241
static double hist_commit[SUM_OVER];
242
static int hist_idx = 0;
243
static double slow_cycles = 0;
244
static double fast_cycles = 0;
245
static double last_switch_time = 0;
246
static double cycle_count = 0;
247
#define SWITCH_CYCLES 30
248
static int speed_delay[SWITCH_CYCLES];
249

    
250
static double max_cycle_power_cc1 = 0.0;
251
static double max_cycle_power_cc2 = 0.0;
252
static double max_cycle_power_cc3 = 0.0;
253

    
254
extern counter_t rename_access;
255
extern counter_t bpred_access;
256
extern counter_t window_access;
257
extern counter_t lsq_access;
258
extern counter_t regfile_access;
259
extern counter_t icache_access;
260
extern counter_t dcache_access;
261
extern counter_t dcache2_access;
262
extern counter_t alu_access;
263
extern counter_t ialu_access;
264
extern counter_t falu_access;
265
extern counter_t resultbus_access;
266

    
267
extern counter_t window_selection_access;
268
extern counter_t window_wakeup_access;
269
extern counter_t window_preg_access;
270
extern counter_t lsq_preg_access;
271
extern counter_t lsq_wakeup_access;
272
extern counter_t lsq_store_data_access;
273
extern counter_t lsq_load_data_access;
274

    
275
extern counter_t window_total_pop_count_cycle;
276
extern counter_t window_num_pop_count_cycle;
277
extern counter_t lsq_total_pop_count_cycle;
278
extern counter_t lsq_num_pop_count_cycle;
279
extern counter_t regfile_total_pop_count_cycle;
280
extern counter_t regfile_num_pop_count_cycle;
281
extern counter_t resultbus_total_pop_count_cycle;
282
extern counter_t resultbus_num_pop_count_cycle;
283

    
284
static counter_t total_rename_access=0;
285
static counter_t total_bpred_access=0;
286
static counter_t total_window_access=0;
287
static counter_t total_lsq_access=0;
288
static counter_t total_regfile_access=0;
289
static counter_t total_icache_access=0;
290
static counter_t total_dcache_access=0;
291
static counter_t total_dcache2_access=0;
292
static counter_t total_alu_access=0;
293
static counter_t total_resultbus_access=0;
294

    
295
static counter_t max_rename_access;
296
static counter_t max_bpred_access;
297
static counter_t max_window_access;
298
static counter_t max_lsq_access;
299
static counter_t max_regfile_access;
300
static counter_t max_icache_access;
301
static counter_t max_dcache_access;
302
static counter_t max_dcache2_access;
303
static counter_t max_alu_access;
304
static counter_t max_resultbus_access;
305

    
306
void clear_access_stats()
307
{
308
  rename_access=0;
309
  bpred_access=0;
310
  window_access=0;
311
  lsq_access=0;
312
  regfile_access=0;
313
  icache_access=0;
314
  dcache_access=0;
315
  dcache2_access=0;
316
  alu_access=0;
317
  ialu_access=0;
318
  falu_access=0;
319
  resultbus_access=0;
320

    
321
  window_preg_access=0;
322
  window_selection_access=0;
323
  window_wakeup_access=0;
324
  lsq_store_data_access=0;
325
  lsq_load_data_access=0;
326
  lsq_wakeup_access=0;
327
  lsq_preg_access=0;
328

    
329
  window_total_pop_count_cycle=0;
330
  window_num_pop_count_cycle=0;
331
  lsq_total_pop_count_cycle=0;
332
  lsq_num_pop_count_cycle=0;
333
  regfile_total_pop_count_cycle=0;
334
  regfile_num_pop_count_cycle=0;
335
  resultbus_total_pop_count_cycle=0;
336
  resultbus_num_pop_count_cycle=0;
337
}
338

    
339
/* compute bitline activity factors which we use to scale bitline power 
340
   Here it is very important whether we assume 0's or 1's are
341
   responsible for dissipating power in pre-charged stuctures. (since
342
   most of the bits are 0's, we assume the design is power-efficient
343
   enough to allow 0's to _not_ discharge 
344
*/
345
double compute_af(counter_t num_pop_count_cycle,counter_t total_pop_count_cycle,int pop_width) {
346
  double avg_pop_count;
347
  double af,af_b;
348

    
349
  if(num_pop_count_cycle)
350
    avg_pop_count = (double)total_pop_count_cycle / (double)num_pop_count_cycle;
351
  else
352
    avg_pop_count = 0;
353

    
354
  af = avg_pop_count / (double)pop_width;
355
  
356
  af_b = 1.0 - af;
357

    
358
  /*  printf("af == %f%%, af_b == %f%%, total_pop == %d, num_pop == %d\n",100*af,100*af_b,total_pop_count_cycle,num_pop_count_cycle); */
359

    
360
  return(af_b);
361
}
362

    
363
/* compute power statistics on each cycle, for each conditional clocking style.  Obviously
364
most of the speed penalty comes here, so if you don't want per-cycle power estimates
365
you could post-process 
366

367
See README.wattch for details on the various clock gating styles.
368

369
*/
370
void update_power_stats()
371
{
372
  double window_af_b, lsq_af_b, regfile_af_b, resultbus_af_b;
373
  double current;
374
  int speed_idx;
375

    
376
#ifdef DYNAMIC_AF
377
  window_af_b = compute_af(window_num_pop_count_cycle,window_total_pop_count_cycle,data_width);
378
  lsq_af_b = compute_af(lsq_num_pop_count_cycle,lsq_total_pop_count_cycle,data_width);
379
  regfile_af_b = compute_af(regfile_num_pop_count_cycle,regfile_total_pop_count_cycle,data_width);
380
  resultbus_af_b = compute_af(resultbus_num_pop_count_cycle,resultbus_total_pop_count_cycle,data_width);
381
#endif
382
  
383
  rename_power+=power.rename_power;
384
  bpred_power+=power.bpred_power;
385
  window_power+=power.window_power;
386
  lsq_power+=power.lsq_power;
387
  regfile_power+=power.regfile_power;
388
  icache_power+=power.icache_power+power.itlb;
389
  dcache_power+=power.dcache_power+power.dtlb;
390
  dcache2_power+=power.dcache2_power;
391
  alu_power+=power.ialu_power + power.falu_power;
392
  falu_power+=power.falu_power;
393
  resultbus_power+=power.resultbus;
394
  clock_power+=power.clock_power;
395

    
396
  total_rename_access+=rename_access;
397
  total_bpred_access+=bpred_access;
398
  total_window_access+=window_access;
399
  total_lsq_access+=lsq_access;
400
  total_regfile_access+=regfile_access;
401
  total_icache_access+=icache_access;
402
  total_dcache_access+=dcache_access;
403
  total_dcache2_access+=dcache2_access;
404
  total_alu_access+=alu_access;
405
  total_resultbus_access+=resultbus_access;
406

    
407
  max_rename_access=MAX(rename_access,max_rename_access);
408
  max_bpred_access=MAX(bpred_access,max_bpred_access);
409
  max_window_access=MAX(window_access,max_window_access);
410
  max_lsq_access=MAX(lsq_access,max_lsq_access);
411
  max_regfile_access=MAX(regfile_access,max_regfile_access);
412
  max_icache_access=MAX(icache_access,max_icache_access);
413
  max_dcache_access=MAX(dcache_access,max_dcache_access);
414
  max_dcache2_access=MAX(dcache2_access,max_dcache2_access);
415
  max_alu_access=MAX(alu_access,max_alu_access);
416
  max_resultbus_access=MAX(resultbus_access,max_resultbus_access);
417
      
418
  if(rename_access) {
419
    rename_power_cc1+=power.rename_power;
420
    rename_power_cc2+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
421
    rename_power_cc3+=((double)rename_access/(double)ruu_decode_width)*power.rename_power;
422
  }
423
  else 
424
    rename_power_cc3+=turnoff_factor*power.rename_power;
425

    
426
  if(bpred_access) {
427
    if(bpred_access <= 2)
428
      bpred_power_cc1+=power.bpred_power;
429
    else
430
      bpred_power_cc1+=((double)bpred_access/2.0) * power.bpred_power;
431
    bpred_power_cc2+=((double)bpred_access/2.0) * power.bpred_power;
432
    bpred_power_cc3+=((double)bpred_access/2.0) * power.bpred_power;
433
  }
434
  else
435
    bpred_power_cc3+=turnoff_factor*power.bpred_power;
436

    
437
#ifdef STATIC_AF
438
  if(window_preg_access) {
439
    if(window_preg_access <= 3*ruu_issue_width)
440
      window_power_cc1+=power.rs_power;
441
    else
442
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
443
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
444
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*power.rs_power;
445
  }
446
  else
447
    window_power_cc3+=turnoff_factor*power.rs_power;
448
#elif defined(DYNAMIC_AF)
449
  if(window_preg_access) {
450
    if(window_preg_access <= 3*ruu_issue_width)
451
      window_power_cc1+=power.rs_power_nobit + window_af_b*power.rs_bitline;
452
    else
453
      window_power_cc1+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
454
    window_power_cc2+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
455
    window_power_cc3+=((double)window_preg_access/(3.0*(double)ruu_issue_width))*(power.rs_power_nobit + window_af_b*power.rs_bitline);
456
  }
457
  else
458
    window_power_cc3+=turnoff_factor*power.rs_power;
459
#else
460
  panic("no AF-style defined\n");
461
#endif
462

    
463
  if(window_selection_access) {
464
    if(window_selection_access <= ruu_issue_width)
465
      window_power_cc1+=power.selection;
466
    else
467
      window_power_cc1+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
468
    window_power_cc2+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
469
    window_power_cc3+=((double)window_selection_access/((double)ruu_issue_width))*power.selection;
470
  }
471
  else
472
    window_power_cc3+=turnoff_factor*power.selection;
473

    
474
  if(window_wakeup_access) {
475
    if(window_wakeup_access <= ruu_issue_width)
476
      window_power_cc1+=power.wakeup_power;
477
    else
478
      window_power_cc1+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
479
    window_power_cc2+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
480
    window_power_cc3+=((double)window_wakeup_access/((double)ruu_issue_width))*power.wakeup_power;
481
  }
482
  else
483
    window_power_cc3+=turnoff_factor*power.wakeup_power;
484

    
485
  if(lsq_wakeup_access) {
486
    if(lsq_wakeup_access <= res_memport)
487
      lsq_power_cc1+=power.lsq_wakeup_power;
488
    else
489
      lsq_power_cc1+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
490
    lsq_power_cc2+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
491
    lsq_power_cc3+=((double)lsq_wakeup_access/((double)res_memport))*power.lsq_wakeup_power;
492
  }
493
  else
494
    lsq_power_cc3+=turnoff_factor*power.lsq_wakeup_power;
495

    
496
#ifdef STATIC_AF
497
  if(lsq_preg_access) {
498
    if(lsq_preg_access <= res_memport)
499
      lsq_power_cc1+=power.lsq_rs_power;
500
    else
501
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
502
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
503
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*power.lsq_rs_power;
504
  }
505
  else
506
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
507
#else
508
  if(lsq_preg_access) {
509
    if(lsq_preg_access <= res_memport)
510
      lsq_power_cc1+=power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline;
511
    else
512
      lsq_power_cc1+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
513
    lsq_power_cc2+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
514
    lsq_power_cc3+=((double)lsq_preg_access/((double)res_memport))*(power.lsq_rs_power_nobit + lsq_af_b*power.lsq_rs_bitline);
515
  }
516
  else
517
    lsq_power_cc3+=turnoff_factor*power.lsq_rs_power;
518
#endif
519

    
520
#ifdef STATIC_AF
521
  if(regfile_access) {
522
    if(regfile_access <= (3.0*ruu_commit_width))
523
      regfile_power_cc1+=power.regfile_power;
524
    else
525
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
526
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
527
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*power.regfile_power;
528
  }
529
  else
530
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
531
#else
532
  if(regfile_access) {
533
    if(regfile_access <= (3.0*ruu_commit_width))
534
      regfile_power_cc1+=power.regfile_power_nobit + regfile_af_b*power.regfile_bitline;
535
    else
536
      regfile_power_cc1+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
537
    regfile_power_cc2+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
538
    regfile_power_cc3+=((double)regfile_access/(3.0*(double)ruu_commit_width))*(power.regfile_power_nobit + regfile_af_b*power.regfile_bitline);
539
  }
540
  else
541
    regfile_power_cc3+=turnoff_factor*power.regfile_power;
542
#endif
543

    
544
  if(icache_access) {
545
    /* don't scale icache because we assume 1 line is fetched, unless fetch stalls */
546
    icache_power_cc1+=power.icache_power+power.itlb;
547
    icache_power_cc2+=power.icache_power+power.itlb;
548
    icache_power_cc3+=power.icache_power+power.itlb;
549
  }
550
  else
551
    icache_power_cc3+=turnoff_factor*(power.icache_power+power.itlb);
552

    
553
  if(dcache_access) {
554
    if(dcache_access <= res_memport)
555
      dcache_power_cc1+=power.dcache_power+power.dtlb;
556
    else
557
      dcache_power_cc1+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
558
                                                     power.dtlb);
559
    dcache_power_cc2+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
560
                                                   power.dtlb);
561
    dcache_power_cc3+=((double)dcache_access/(double)res_memport)*(power.dcache_power +
562
                                                   power.dtlb);
563
  }
564
  else
565
    dcache_power_cc3+=turnoff_factor*(power.dcache_power+power.dtlb);
566

    
567
  if(dcache2_access) {
568
    if(dcache2_access <= res_memport)
569
      dcache2_power_cc1+=power.dcache2_power;
570
    else
571
      dcache2_power_cc1+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
572
    dcache2_power_cc2+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
573
    dcache2_power_cc3+=((double)dcache2_access/(double)res_memport)*power.dcache2_power;
574
  }
575
  else
576
    dcache2_power_cc3+=turnoff_factor*power.dcache2_power;
577

    
578
  if(alu_access) {
579
    if(ialu_access)
580
      alu_power_cc1+=power.ialu_power;
581
    else
582
      alu_power_cc3+=turnoff_factor*power.ialu_power;
583
    if(falu_access)
584
      alu_power_cc1+=power.falu_power;
585
    else
586
      alu_power_cc3+=turnoff_factor*power.falu_power;
587

    
588
    alu_power_cc2+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
589
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
590
    alu_power_cc3+=((double)ialu_access/(double)res_ialu)*power.ialu_power +
591
      ((double)falu_access/(double)res_fpalu)*power.falu_power;
592
  }
593
  else
594
    alu_power_cc3+=turnoff_factor*(power.ialu_power + power.falu_power);
595

    
596
#ifdef STATIC_AF
597
  if(resultbus_access) {
598
    assert(ruu_issue_width != 0);
599
    if(resultbus_access <= ruu_issue_width) {
600
      resultbus_power_cc1+=power.resultbus;
601
    }
602
    else {
603
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
604
    }
605
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
606
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*power.resultbus;
607
  }
608
  else
609
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
610
#else
611
  if(resultbus_access) {
612
    assert(ruu_issue_width != 0);
613
    if(resultbus_access <= ruu_issue_width) {
614
      resultbus_power_cc1+=resultbus_af_b*power.resultbus;
615
    }
616
    else {
617
      resultbus_power_cc1+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
618
    }
619
    resultbus_power_cc2+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
620
    resultbus_power_cc3+=((double)resultbus_access/(double)ruu_issue_width)*resultbus_af_b*power.resultbus;
621
  }
622
  else
623
    resultbus_power_cc3+=turnoff_factor*power.resultbus;
624
#endif
625

    
626
  total_cycle_power = rename_power + bpred_power + window_power + 
627
    lsq_power + regfile_power + icache_power + dcache_power +
628
    alu_power + resultbus_power;
629

    
630
  total_cycle_power_cc1 = rename_power_cc1 + bpred_power_cc1 + 
631
    window_power_cc1 + lsq_power_cc1 + regfile_power_cc1 + 
632
    icache_power_cc1 + dcache_power_cc1 + alu_power_cc1 + 
633
    resultbus_power_cc1;
634

    
635
  total_cycle_power_cc2 = rename_power_cc2 + bpred_power_cc2 + 
636
    window_power_cc2 + lsq_power_cc2 + regfile_power_cc2 + 
637
    icache_power_cc2 + dcache_power_cc2 + alu_power_cc2 + 
638
    resultbus_power_cc2;
639

    
640
  total_cycle_power_cc3 = rename_power_cc3 + bpred_power_cc3 + 
641
    window_power_cc3 + lsq_power_cc3 + regfile_power_cc3 + 
642
    icache_power_cc3 + dcache_power_cc3 + alu_power_cc3 + 
643
    resultbus_power_cc3;
644

    
645
  clock_power_cc1+=power.clock_power*(total_cycle_power_cc1/total_cycle_power);
646
  clock_power_cc2+=power.clock_power*(total_cycle_power_cc2/total_cycle_power);
647
  clock_power_cc3+=power.clock_power*(total_cycle_power_cc3/total_cycle_power);
648

    
649
  total_cycle_power_cc1 += clock_power_cc1;
650
  total_cycle_power_cc2 += clock_power_cc2;
651
  total_cycle_power_cc3 += clock_power_cc3;
652

    
653
  current_total_cycle_power_cc1 = total_cycle_power_cc1
654
    -last_single_total_cycle_power_cc1;
655
  current_total_cycle_power_cc2 = total_cycle_power_cc2
656
    -last_single_total_cycle_power_cc2;
657
  current_total_cycle_power_cc3 = total_cycle_power_cc3
658
    -last_single_total_cycle_power_cc3;
659

    
660
   current = current_total_cycle_power_cc3 / Vdd;
661

    
662
  if (max_amp < current ) {
663
      max_amp = current ;
664
  }
665

    
666
  if (min_amp > current) {
667
      min_amp = current;
668
  }
669

    
670
  if (current < 0.5) {
671
      total_parasitic_cc1 += offchip_ploss[0];
672
      total_parasitic_cc2 += offchip_ploss[0];
673
      total_parasitic_cc3 += offchip_ploss[0];
674
  } else if (current < 1) {
675
      total_parasitic_cc1 += offchip_ploss[1];
676
      total_parasitic_cc2 += offchip_ploss[1];
677
      total_parasitic_cc3 += offchip_ploss[1];
678
  } else if (current < 1.5) {
679
      total_parasitic_cc1 += offchip_ploss[2];
680
      total_parasitic_cc2 += offchip_ploss[2];
681
      total_parasitic_cc3 += offchip_ploss[2];
682
  } else if (current < 2) {
683
      total_parasitic_cc1 += offchip_ploss[3];
684
      total_parasitic_cc2 += offchip_ploss[3];
685
      total_parasitic_cc3 += offchip_ploss[3];
686
  } else if (current < 2.5) {
687
      total_parasitic_cc1 += offchip_ploss[4];
688
      total_parasitic_cc2 += offchip_ploss[4];
689
      total_parasitic_cc3 += offchip_ploss[4];
690
  } else if (current < 3) {
691
      total_parasitic_cc1 += offchip_ploss[5];
692
      total_parasitic_cc2 += offchip_ploss[5];
693
      total_parasitic_cc3 += offchip_ploss[5];
694
  } else if (current < 3.5) {
695
      total_parasitic_cc1 += offchip_ploss[6];
696
      total_parasitic_cc2 += offchip_ploss[6];
697
      total_parasitic_cc3 += offchip_ploss[6];
698
  } else if (current < 4) {
699
      total_parasitic_cc1 += offchip_ploss[7];
700
      total_parasitic_cc2 += offchip_ploss[7];
701
      total_parasitic_cc3 += offchip_ploss[7];
702
  } else if (current < 4.5) {
703
      total_parasitic_cc1 += offchip_ploss[8];
704
      total_parasitic_cc2 += offchip_ploss[8];
705
      total_parasitic_cc3 += offchip_ploss[8];
706
  } else if (current < 5) {
707
      total_parasitic_cc1 += offchip_ploss[9];
708
      total_parasitic_cc2 += offchip_ploss[9];
709
      total_parasitic_cc3 += offchip_ploss[9];
710
  } else if (current < 5.5) {
711
      total_parasitic_cc1 += offchip_ploss[10];
712
      total_parasitic_cc2 += offchip_ploss[10];
713
      total_parasitic_cc3 += offchip_ploss[10];
714
  } else if (current < 6) {
715
      total_parasitic_cc1 += offchip_ploss[11];
716
      total_parasitic_cc2 += offchip_ploss[11];
717
      total_parasitic_cc3 += offchip_ploss[11];
718
  } else if (current < 6.5) {
719
      total_parasitic_cc1 += offchip_ploss[12];
720
      total_parasitic_cc2 += offchip_ploss[12];
721
      total_parasitic_cc3 += offchip_ploss[12];
722
  } else if (current < 7) {
723
      total_parasitic_cc1 += offchip_ploss[13];
724
      total_parasitic_cc2 += offchip_ploss[13];
725
      total_parasitic_cc3 += offchip_ploss[13];
726
  } else if (current < 7.5) {
727
      total_parasitic_cc1 += offchip_ploss[14];
728
      total_parasitic_cc2 += offchip_ploss[14];
729
      total_parasitic_cc3 += offchip_ploss[14];
730
  } else if (current < 8) {
731
      total_parasitic_cc1 += offchip_ploss[15];
732
      total_parasitic_cc2 += offchip_ploss[15];
733
      total_parasitic_cc3 += offchip_ploss[15];
734
  } else if (current < 8.5) {
735
      total_parasitic_cc1 += offchip_ploss[16];
736
      total_parasitic_cc2 += offchip_ploss[16];
737
      total_parasitic_cc3 += offchip_ploss[16];
738
  } else if (current < 9) {
739
      total_parasitic_cc1 += offchip_ploss[17];
740
      total_parasitic_cc2 += offchip_ploss[17];
741
      total_parasitic_cc3 += offchip_ploss[17];
742
  } else if (current < 9.5) {
743
      total_parasitic_cc1 += offchip_ploss[18];
744
      total_parasitic_cc2 += offchip_ploss[18];
745
      total_parasitic_cc3 += offchip_ploss[18];
746
  } else if (current < 10) {
747
      total_parasitic_cc1 += offchip_ploss[19];
748
      total_parasitic_cc2 += offchip_ploss[19];
749
      total_parasitic_cc3 += offchip_ploss[19];
750
  } else if (current < 10.5) {
751
      total_parasitic_cc1 += offchip_ploss[20];
752
      total_parasitic_cc2 += offchip_ploss[20];
753
      total_parasitic_cc3 += offchip_ploss[20];
754
  } else if (current < 11) {
755
      total_parasitic_cc1 += offchip_ploss[21];
756
      total_parasitic_cc2 += offchip_ploss[21];
757
      total_parasitic_cc3 += offchip_ploss[21];
758
  } else if (current < 11.5) {
759
      total_parasitic_cc1 += offchip_ploss[22];
760
      total_parasitic_cc2 += offchip_ploss[22];
761
      total_parasitic_cc3 += offchip_ploss[22];
762
  } else if (current < 12) {
763
      total_parasitic_cc1 += offchip_ploss[23];
764
      total_parasitic_cc2 += offchip_ploss[23];
765
      total_parasitic_cc3 += offchip_ploss[23];
766
  } else if (current < 12.5) {
767
      total_parasitic_cc1 += offchip_ploss[24];
768
      total_parasitic_cc2 += offchip_ploss[24];
769
      total_parasitic_cc3 += offchip_ploss[24];
770
  } else if (current < 13) {
771
      total_parasitic_cc1 += offchip_ploss[25];
772
      total_parasitic_cc2 += offchip_ploss[25];
773
      total_parasitic_cc3 += offchip_ploss[25];
774
  } else {
775
      total_parasitic_cc1 += offchip_ploss[26];
776
      total_parasitic_cc2 += offchip_ploss[26];
777
      total_parasitic_cc3 += offchip_ploss[26];
778
  }
779

    
780
  total_parasitic_cc1 += pow(current, 2) * PARASITIC_OHM;
781
  total_parasitic_cc2 += pow(current, 2) * PARASITIC_OHM;
782
  total_parasitic_cc3 += pow(current, 2) * PARASITIC_OHM;
783

    
784
  max_cycle_power_cc1 = MAX(max_cycle_power_cc1,current_total_cycle_power_cc1);
785
  max_cycle_power_cc2 = MAX(max_cycle_power_cc2,current_total_cycle_power_cc2);
786
  max_cycle_power_cc3 = MAX(max_cycle_power_cc3,current_total_cycle_power_cc3);
787

    
788
  last_single_total_cycle_power_cc1 = total_cycle_power_cc1;
789
  last_single_total_cycle_power_cc2 = total_cycle_power_cc2;
790
  last_single_total_cycle_power_cc3 = total_cycle_power_cc3;
791

    
792
  cycle_count++;
793

    
794
  // here's where we change VFI levels
795
  diff_dispatch = sim_total_insn - last_sim_total_insn;
796
  diff_commit = sim_num_insn - last_sim_num_insn;
797
  
798
  diff_dispatch_sum += diff_dispatch;
799
  diff_commit_sum += diff_commit;
800

    
801
  hist_dispatch[hist_idx] = diff_dispatch;
802
  hist_commit[hist_idx] = diff_commit;
803
  hist_idx++;
804
  if(hist_idx >= SUM_OVER) {
805
    hist_idx = 0;
806
  }
807

    
808
  if(init_count >= SUM_OVER) {
809
      // Update speed
810
    speed_grade = speed_delay[SWITCH_CYCLES - 1];
811
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES-1; speed_idx++) {
812

    
813
        speed_delay[speed_idx+1] = speed_delay[speed_idx];
814
    }
815

    
816
    diff_dispatch_sum -= hist_dispatch[hist_idx];
817
    diff_commit_sum -= hist_commit[hist_idx];
818

    
819
    if( diff_commit_sum < diff_dispatch_sum ) {
820
        speed_delay[0] = 0;
821
    }
822
    else if( diff_commit_sum >= diff_dispatch_sum ) {
823
        speed_delay[0] = 1;
824
    }
825

    
826
    if(speed_grade == 0) {
827
        slow_cycles++;
828
    }
829
    else {
830
        fast_cycles++;
831
    }
832

    
833
  } else {
834
    init_count++;
835
    fast_cycles++;
836

    
837
    for (speed_idx = 0; speed_idx < SWITCH_CYCLES; speed_idx++) {
838
        speed_delay[speed_idx] = 1;
839
    }
840
  }
841

    
842
//  if (diff_commit <= diff_dispatch) {
843
//      speed_grade = 0;
844
//  } else if (diff_commit > diff_dispatch) {
845
//      speed_grade = 1;
846
//  }
847

    
848
  if ((speed_grade == 0) && (last_speed_grade == 1)) {
849
      Mhz = Mhz / 2;
850
      Vdd = Vdd / 2;
851
      printf("Speed down!\n");
852
      last_switch_time = cycle_count;
853
  } else if ((speed_grade == 1) && (last_speed_grade == 0)) {
854
      Mhz = Mhz * 2;
855
      Vdd = Vdd * 2;
856
      printf("Speed up!\n");
857
      last_switch_time = cycle_count;
858
  }
859
#ifdef DVFS_FIX
860
  else if (last_switch_time < cycle_count-(SUM_OVER/3) && speed_grade==0 ) {
861
      speed_grade = 1;
862
      Mhz = Mhz * 2;
863
      Vdd = Vdd * 2;
864
      init_count = 0;
865
      last_switch_time = cycle_count;
866
      hist_idx = 0;
867
      diff_commit_sum = 0;
868
      diff_dispatch_sum = 0;
869
  }
870
#endif
871
      //printf("Vdd = %f, MHz = %f\n",Vdd,Mhz);
872

    
873
  if (speed_grade != last_speed_grade) {
874
    Period = 1/Mhz;
875
    SensePowerfactor3 = Mhz * Vbitsense * Vbitsense;
876
    SensePowerfactor2 = Mhz * (Vbitpre - Vbitsense) * (Vbitpre - Vbitsense);
877
    SensePowerfactor = (Mhz) * (Vdd/2) * (Vdd/2);
878
    Powerfactor = (Mhz) * (Vdd) * (Vdd);
879
    Sense2Powerfactor = Mhz * (2 * .3 + .1 * Vdd);
880
    LowSwingPowerfactor = Mhz * .2 * .2;
881
      calculate_power(&power);
882
  }
883

    
884
  last_speed_grade = speed_grade;
885

    
886
  // Update
887
  last_sim_num_insn  = sim_num_insn;
888
  last_sim_total_insn = sim_total_insn;
889

    
890
}
891

    
892
void
893
power_reg_stats(struct stat_sdb_t *sdb)        /* stats database */
894
{
895
  stat_reg_double(sdb, "rename_power", "total power usage of rename unit", &rename_power, 0, NULL);
896

    
897
  stat_reg_double(sdb, "bpred_power", "total power usage of bpred unit", &bpred_power, 0, NULL);
898

    
899
  stat_reg_double(sdb, "window_power", "total power usage of instruction window", &window_power, 0, NULL);
900

    
901
  stat_reg_double(sdb, "lsq_power", "total power usage of load/store queue", &lsq_power, 0, NULL);
902

    
903
  stat_reg_double(sdb, "regfile_power", "total power usage of arch. regfile", &regfile_power, 0, NULL);
904

    
905
  stat_reg_double(sdb, "icache_power", "total power usage of icache", &icache_power, 0, NULL);
906

    
907
  stat_reg_double(sdb, "dcache_power", "total power usage of dcache", &dcache_power, 0, NULL);
908

    
909
  stat_reg_double(sdb, "dcache2_power", "total power usage of dcache2", &dcache2_power, 0, NULL);
910

    
911
  stat_reg_double(sdb, "alu_power", "total power usage of alu", &alu_power, 0, NULL);
912

    
913
  stat_reg_double(sdb, "falu_power", "total power usage of falu", &falu_power, 0, NULL);
914

    
915
  stat_reg_double(sdb, "resultbus_power", "total power usage of resultbus", &resultbus_power, 0, NULL);
916

    
917
  stat_reg_double(sdb, "clock_power", "total power usage of clock", &clock_power, 0, NULL);
918

    
919
  stat_reg_formula(sdb, "avg_rename_power", "avg power usage of rename unit", "rename_power/sim_cycle", NULL);
920

    
921
  stat_reg_formula(sdb, "avg_bpred_power", "avg power usage of bpred unit", "bpred_power/sim_cycle", NULL);
922

    
923
  stat_reg_formula(sdb, "avg_window_power", "avg power usage of instruction window", "window_power/sim_cycle",  NULL);
924

    
925
  stat_reg_formula(sdb, "avg_lsq_power", "avg power usage of lsq", "lsq_power/sim_cycle",  NULL);
926

    
927
  stat_reg_formula(sdb, "avg_regfile_power", "avg power usage of arch. regfile", "regfile_power/sim_cycle",  NULL);
928

    
929
  stat_reg_formula(sdb, "avg_icache_power", "avg power usage of icache", "icache_power/sim_cycle",  NULL);
930

    
931
  stat_reg_formula(sdb, "avg_dcache_power", "avg power usage of dcache", "dcache_power/sim_cycle",  NULL);
932

    
933
  stat_reg_formula(sdb, "avg_dcache2_power", "avg power usage of dcache2", "dcache2_power/sim_cycle",  NULL);
934

    
935
  stat_reg_formula(sdb, "avg_alu_power", "avg power usage of alu", "alu_power/sim_cycle",  NULL);
936

    
937
  stat_reg_formula(sdb, "avg_falu_power", "avg power usage of falu", "falu_power/sim_cycle",  NULL);
938

    
939
  stat_reg_formula(sdb, "avg_resultbus_power", "avg power usage of resultbus", "resultbus_power/sim_cycle",  NULL);
940

    
941
  stat_reg_formula(sdb, "avg_clock_power", "avg power usage of clock", "clock_power/sim_cycle",  NULL);
942

    
943
  stat_reg_formula(sdb, "fetch_stage_power", "total power usage of fetch stage", "icache_power + bpred_power", NULL);
944

    
945
  stat_reg_formula(sdb, "dispatch_stage_power", "total power usage of dispatch stage", "rename_power", NULL);
946

    
947
  stat_reg_formula(sdb, "issue_stage_power", "total power usage of issue stage", "resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power", NULL);
948

    
949
  stat_reg_formula(sdb, "avg_fetch_power", "average power of fetch unit per cycle", "(icache_power + bpred_power)/ sim_cycle", /* format */NULL);
950

    
951
  stat_reg_formula(sdb, "avg_dispatch_power", "average power of dispatch unit per cycle", "(rename_power)/ sim_cycle", /* format */NULL);
952

    
953
  stat_reg_formula(sdb, "avg_issue_power", "average power of issue unit per cycle", "(resultbus_power + alu_power + dcache_power + dcache2_power + window_power + lsq_power)/ sim_cycle", /* format */NULL);
954

    
955
  stat_reg_formula(sdb, "total_power", "total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power  + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)", NULL);
956

    
957
  stat_reg_formula(sdb, "avg_total_power_cycle", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_cycle", NULL);
958

    
959
  stat_reg_formula(sdb, "avg_total_power_cycle_nofp_nod2", "average total power per cycle","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_cycle", NULL);
960

    
961
  stat_reg_formula(sdb, "avg_total_power_insn", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power + dcache2_power)/sim_total_insn", NULL);
962

    
963
  stat_reg_formula(sdb, "avg_total_power_insn_nofp_nod2", "average total power per insn","(rename_power + bpred_power + window_power + lsq_power + regfile_power + icache_power + resultbus_power + clock_power + alu_power + dcache_power - falu_power )/sim_total_insn", NULL);
964

    
965
  stat_reg_double(sdb, "rename_power_cc1", "total power usage of rename unit_cc1", &rename_power_cc1, 0, NULL);
966

    
967
  stat_reg_double(sdb, "bpred_power_cc1", "total power usage of bpred unit_cc1", &bpred_power_cc1, 0, NULL);
968

    
969
  stat_reg_double(sdb, "window_power_cc1", "total power usage of instruction window_cc1", &window_power_cc1, 0, NULL);
970

    
971
  stat_reg_double(sdb, "lsq_power_cc1", "total power usage of lsq_cc1", &lsq_power_cc1, 0, NULL);
972

    
973
  stat_reg_double(sdb, "regfile_power_cc1", "total power usage of arch. regfile_cc1", &regfile_power_cc1, 0, NULL);
974

    
975
  stat_reg_double(sdb, "icache_power_cc1", "total power usage of icache_cc1", &icache_power_cc1, 0, NULL);
976

    
977
  stat_reg_double(sdb, "dcache_power_cc1", "total power usage of dcache_cc1", &dcache_power_cc1, 0, NULL);
978

    
979
  stat_reg_double(sdb, "dcache2_power_cc1", "total power usage of dcache2_cc1", &dcache2_power_cc1, 0, NULL);
980

    
981
  stat_reg_double(sdb, "alu_power_cc1", "total power usage of alu_cc1", &alu_power_cc1, 0, NULL);
982

    
983
  stat_reg_double(sdb, "resultbus_power_cc1", "total power usage of resultbus_cc1", &resultbus_power_cc1, 0, NULL);
984

    
985
  stat_reg_double(sdb, "clock_power_cc1", "total power usage of clock_cc1", &clock_power_cc1, 0, NULL);
986

    
987
  stat_reg_formula(sdb, "avg_rename_power_cc1", "avg power usage of rename unit_cc1", "rename_power_cc1/sim_cycle", NULL);
988

    
989
  stat_reg_formula(sdb, "avg_bpred_power_cc1", "avg power usage of bpred unit_cc1", "bpred_power_cc1/sim_cycle", NULL);
990

    
991
  stat_reg_formula(sdb, "avg_window_power_cc1", "avg power usage of instruction window_cc1", "window_power_cc1/sim_cycle",  NULL);
992

    
993
  stat_reg_formula(sdb, "avg_lsq_power_cc1", "avg power usage of lsq_cc1", "lsq_power_cc1/sim_cycle",  NULL);
994

    
995
  stat_reg_formula(sdb, "avg_regfile_power_cc1", "avg power usage of arch. regfile_cc1", "regfile_power_cc1/sim_cycle",  NULL);
996

    
997
  stat_reg_formula(sdb, "avg_icache_power_cc1", "avg power usage of icache_cc1", "icache_power_cc1/sim_cycle",  NULL);
998

    
999
  stat_reg_formula(sdb, "avg_dcache_power_cc1", "avg power usage of dcache_cc1", "dcache_power_cc1/sim_cycle",  NULL);
1000

    
1001
  stat_reg_formula(sdb, "avg_dcache2_power_cc1", "avg power usage of dcache2_cc1", "dcache2_power_cc1/sim_cycle",  NULL);
1002

    
1003
  stat_reg_formula(sdb, "avg_alu_power_cc1", "avg power usage of alu_cc1", "alu_power_cc1/sim_cycle",  NULL);
1004

    
1005
  stat_reg_formula(sdb, "avg_resultbus_power_cc1", "avg power usage of resultbus_cc1", "resultbus_power_cc1/sim_cycle",  NULL);
1006

    
1007
  stat_reg_formula(sdb, "avg_clock_power_cc1", "avg power usage of clock_cc1", "clock_power_cc1/sim_cycle",  NULL);
1008

    
1009
  stat_reg_formula(sdb, "fetch_stage_power_cc1", "total power usage of fetch stage_cc1", "icache_power_cc1 + bpred_power_cc1", NULL);
1010

    
1011
  stat_reg_formula(sdb, "dispatch_stage_power_cc1", "total power usage of dispatch stage_cc1", "rename_power_cc1", NULL);
1012

    
1013
  stat_reg_formula(sdb, "issue_stage_power_cc1", "total power usage of issue stage_cc1", "resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1", NULL);
1014

    
1015
  stat_reg_formula(sdb, "avg_fetch_power_cc1", "average power of fetch unit per cycle_cc1", "(icache_power_cc1 + bpred_power_cc1)/ sim_cycle", /* format */NULL);
1016

    
1017
  stat_reg_formula(sdb, "avg_dispatch_power_cc1", "average power of dispatch unit per cycle_cc1", "(rename_power_cc1)/ sim_cycle", /* format */NULL);
1018

    
1019
  stat_reg_formula(sdb, "avg_issue_power_cc1", "average power of issue unit per cycle_cc1", "(resultbus_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1 + lsq_power_cc1 + window_power_cc1)/ sim_cycle", /* format */NULL);
1020

    
1021
  stat_reg_formula(sdb, "total_power_cycle_cc1", "total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)", NULL);
1022

    
1023
  stat_reg_formula(sdb, "avg_total_power_cycle_cc1", "average total power per cycle_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 + alu_power_cc1 + dcache_power_cc1 +dcache2_power_cc1)/sim_cycle", NULL);
1024

    
1025
  stat_reg_formula(sdb, "avg_total_power_insn_cc1", "average total power per insn_cc1","(rename_power_cc1 + bpred_power_cc1 + lsq_power_cc1 + window_power_cc1 + regfile_power_cc1 + icache_power_cc1 + resultbus_power_cc1 + clock_power_cc1 +  alu_power_cc1 + dcache_power_cc1 + dcache2_power_cc1)/sim_total_insn", NULL);
1026

    
1027
  stat_reg_double(sdb, "rename_power_cc2", "total power usage of rename unit_cc2", &rename_power_cc2, 0, NULL);
1028

    
1029
  stat_reg_double(sdb, "bpred_power_cc2", "total power usage of bpred unit_cc2", &bpred_power_cc2, 0, NULL);
1030

    
1031
  stat_reg_double(sdb, "window_power_cc2", "total power usage of instruction window_cc2", &window_power_cc2, 0, NULL);
1032

    
1033
  stat_reg_double(sdb, "lsq_power_cc2", "total power usage of lsq_cc2", &lsq_power_cc2, 0, NULL);
1034

    
1035
  stat_reg_double(sdb, "regfile_power_cc2", "total power usage of arch. regfile_cc2", &regfile_power_cc2, 0, NULL);
1036

    
1037
  stat_reg_double(sdb, "icache_power_cc2", "total power usage of icache_cc2", &icache_power_cc2, 0, NULL);
1038

    
1039
  stat_reg_double(sdb, "dcache_power_cc2", "total power usage of dcache_cc2", &dcache_power_cc2, 0, NULL);
1040

    
1041
  stat_reg_double(sdb, "dcache2_power_cc2", "total power usage of dcache2_cc2", &dcache2_power_cc2, 0, NULL);
1042

    
1043
  stat_reg_double(sdb, "alu_power_cc2", "total power usage of alu_cc2", &alu_power_cc2, 0, NULL);
1044

    
1045
  stat_reg_double(sdb, "resultbus_power_cc2", "total power usage of resultbus_cc2", &resultbus_power_cc2, 0, NULL);
1046

    
1047
  stat_reg_double(sdb, "clock_power_cc2", "total power usage of clock_cc2", &clock_power_cc2, 0, NULL);
1048

    
1049
  stat_reg_formula(sdb, "avg_rename_power_cc2", "avg power usage of rename unit_cc2", "rename_power_cc2/sim_cycle", NULL);
1050

    
1051
  stat_reg_formula(sdb, "avg_bpred_power_cc2", "avg power usage of bpred unit_cc2", "bpred_power_cc2/sim_cycle", NULL);
1052

    
1053
  stat_reg_formula(sdb, "avg_window_power_cc2", "avg power usage of instruction window_cc2", "window_power_cc2/sim_cycle",  NULL);
1054

    
1055
  stat_reg_formula(sdb, "avg_lsq_power_cc2", "avg power usage of instruction lsq_cc2", "lsq_power_cc2/sim_cycle",  NULL);
1056

    
1057
  stat_reg_formula(sdb, "avg_regfile_power_cc2", "avg power usage of arch. regfile_cc2", "regfile_power_cc2/sim_cycle",  NULL);
1058

    
1059
  stat_reg_formula(sdb, "avg_icache_power_cc2", "avg power usage of icache_cc2", "icache_power_cc2/sim_cycle",  NULL);
1060

    
1061
  stat_reg_formula(sdb, "avg_dcache_power_cc2", "avg power usage of dcache_cc2", "dcache_power_cc2/sim_cycle",  NULL);
1062

    
1063
  stat_reg_formula(sdb, "avg_dcache2_power_cc2", "avg power usage of dcache2_cc2", "dcache2_power_cc2/sim_cycle",  NULL);
1064

    
1065
  stat_reg_formula(sdb, "avg_alu_power_cc2", "avg power usage of alu_cc2", "alu_power_cc2/sim_cycle",  NULL);
1066

    
1067
  stat_reg_formula(sdb, "avg_resultbus_power_cc2", "avg power usage of resultbus_cc2", "resultbus_power_cc2/sim_cycle",  NULL);
1068

    
1069
  stat_reg_formula(sdb, "avg_clock_power_cc2", "avg power usage of clock_cc2", "clock_power_cc2/sim_cycle",  NULL);
1070

    
1071
  stat_reg_formula(sdb, "fetch_stage_power_cc2", "total power usage of fetch stage_cc2", "icache_power_cc2 + bpred_power_cc2", NULL);
1072

    
1073
  stat_reg_formula(sdb, "dispatch_stage_power_cc2", "total power usage of dispatch stage_cc2", "rename_power_cc2", NULL);
1074

    
1075
  stat_reg_formula(sdb, "issue_stage_power_cc2", "total power usage of issue stage_cc2", "resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2", NULL);
1076

    
1077
  stat_reg_formula(sdb, "avg_fetch_power_cc2", "average power of fetch unit per cycle_cc2", "(icache_power_cc2 + bpred_power_cc2)/ sim_cycle", /* format */NULL);
1078

    
1079
  stat_reg_formula(sdb, "avg_dispatch_power_cc2", "average power of dispatch unit per cycle_cc2", "(rename_power_cc2)/ sim_cycle", /* format */NULL);
1080

    
1081
  stat_reg_formula(sdb, "avg_issue_power_cc2", "average power of issue unit per cycle_cc2", "(resultbus_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2 + lsq_power_cc2 + window_power_cc2)/ sim_cycle", /* format */NULL);
1082

    
1083
  stat_reg_formula(sdb, "total_power_cycle_cc2", "total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)", NULL);
1084

    
1085
  stat_reg_formula(sdb, "avg_total_power_cycle_cc2", "average total power per cycle_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_cycle", NULL);
1086

    
1087
  stat_reg_formula(sdb, "avg_total_power_insn_cc2", "average total power per insn_cc2","(rename_power_cc2 + bpred_power_cc2 + lsq_power_cc2 + window_power_cc2 + regfile_power_cc2 + icache_power_cc2 + resultbus_power_cc2 + clock_power_cc2 + alu_power_cc2 + dcache_power_cc2 + dcache2_power_cc2)/sim_total_insn", NULL);
1088

    
1089
  stat_reg_double(sdb, "rename_power_cc3", "total power usage of rename unit_cc3", &rename_power_cc3, 0, NULL);
1090

    
1091
  stat_reg_double(sdb, "bpred_power_cc3", "total power usage of bpred unit_cc3", &bpred_power_cc3, 0, NULL);
1092

    
1093
  stat_reg_double(sdb, "window_power_cc3", "total power usage of instruction window_cc3", &window_power_cc3, 0, NULL);
1094

    
1095
  stat_reg_double(sdb, "lsq_power_cc3", "total power usage of lsq_cc3", &lsq_power_cc3, 0, NULL);
1096

    
1097
  stat_reg_double(sdb, "regfile_power_cc3", "total power usage of arch. regfile_cc3", &regfile_power_cc3, 0, NULL);
1098

    
1099
  stat_reg_double(sdb, "icache_power_cc3", "total power usage of icache_cc3", &icache_power_cc3, 0, NULL);
1100

    
1101
  stat_reg_double(sdb, "dcache_power_cc3", "total power usage of dcache_cc3", &dcache_power_cc3, 0, NULL);
1102

    
1103
  stat_reg_double(sdb, "dcache2_power_cc3", "total power usage of dcache2_cc3", &dcache2_power_cc3, 0, NULL);
1104

    
1105
  stat_reg_double(sdb, "alu_power_cc3", "total power usage of alu_cc3", &alu_power_cc3, 0, NULL);
1106

    
1107
  stat_reg_double(sdb, "resultbus_power_cc3", "total power usage of resultbus_cc3", &resultbus_power_cc3, 0, NULL);
1108

    
1109
  stat_reg_double(sdb, "clock_power_cc3", "total power usage of clock_cc3", &clock_power_cc3, 0, NULL);
1110

    
1111
  stat_reg_formula(sdb, "avg_rename_power_cc3", "avg power usage of rename unit_cc3", "rename_power_cc3/sim_cycle", NULL);
1112

    
1113
  stat_reg_formula(sdb, "avg_bpred_power_cc3", "avg power usage of bpred unit_cc3", "bpred_power_cc3/sim_cycle", NULL);
1114

    
1115
  stat_reg_formula(sdb, "avg_window_power_cc3", "avg power usage of instruction window_cc3", "window_power_cc3/sim_cycle",  NULL);
1116

    
1117
  stat_reg_formula(sdb, "avg_lsq_power_cc3", "avg power usage of instruction lsq_cc3", "lsq_power_cc3/sim_cycle",  NULL);
1118

    
1119
  stat_reg_formula(sdb, "avg_regfile_power_cc3", "avg power usage of arch. regfile_cc3", "regfile_power_cc3/sim_cycle",  NULL);
1120

    
1121
  stat_reg_formula(sdb, "avg_icache_power_cc3", "avg power usage of icache_cc3", "icache_power_cc3/sim_cycle",  NULL);
1122

    
1123
  stat_reg_formula(sdb, "avg_dcache_power_cc3", "avg power usage of dcache_cc3", "dcache_power_cc3/sim_cycle",  NULL);
1124

    
1125
  stat_reg_formula(sdb, "avg_dcache2_power_cc3", "avg power usage of dcache2_cc3", "dcache2_power_cc3/sim_cycle",  NULL);
1126

    
1127
  stat_reg_formula(sdb, "avg_alu_power_cc3", "avg power usage of alu_cc3", "alu_power_cc3/sim_cycle",  NULL);
1128

    
1129
  stat_reg_formula(sdb, "avg_resultbus_power_cc3", "avg power usage of resultbus_cc3", "resultbus_power_cc3/sim_cycle",  NULL);
1130

    
1131
  stat_reg_formula(sdb, "avg_clock_power_cc3", "avg power usage of clock_cc3", "clock_power_cc3/sim_cycle",  NULL);
1132

    
1133
  stat_reg_formula(sdb, "fetch_stage_power_cc3", "total power usage of fetch stage_cc3", "icache_power_cc3 + bpred_power_cc3", NULL);
1134

    
1135
  stat_reg_formula(sdb, "dispatch_stage_power_cc3", "total power usage of dispatch stage_cc3", "rename_power_cc3", NULL);
1136

    
1137
  stat_reg_formula(sdb, "issue_stage_power_cc3", "total power usage of issue stage_cc3", "resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3", NULL);
1138

    
1139
  stat_reg_formula(sdb, "avg_fetch_power_cc3", "average power of fetch unit per cycle_cc3", "(icache_power_cc3 + bpred_power_cc3)/ sim_cycle", /* format */NULL);
1140

    
1141
  stat_reg_formula(sdb, "avg_dispatch_power_cc3", "average power of dispatch unit per cycle_cc3", "(rename_power_cc3)/ sim_cycle", /* format */NULL);
1142

    
1143
  stat_reg_formula(sdb, "avg_issue_power_cc3", "average power of issue unit per cycle_cc3", "(resultbus_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3 + lsq_power_cc3 + window_power_cc3)/ sim_cycle", /* format */NULL);
1144

    
1145
  stat_reg_formula(sdb, "total_power_cycle_cc3", "total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)", NULL);
1146

    
1147
  stat_reg_formula(sdb, "avg_total_power_cycle_cc3", "average total power per cycle_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_cycle", NULL);
1148

    
1149
  stat_reg_formula(sdb, "avg_total_power_insn_cc3", "average total power per insn_cc3","(rename_power_cc3 + bpred_power_cc3 + lsq_power_cc3 + window_power_cc3 + regfile_power_cc3 + icache_power_cc3 + resultbus_power_cc3 + clock_power_cc3 + alu_power_cc3 + dcache_power_cc3 + dcache2_power_cc3)/sim_total_insn", NULL);
1150

    
1151
  stat_reg_counter(sdb, "total_rename_access", "total number accesses of rename unit", &total_rename_access, 0, NULL);
1152

    
1153
  stat_reg_counter(sdb, "total_bpred_access", "total number accesses of bpred unit", &total_bpred_access, 0, NULL);
1154

    
1155
  stat_reg_counter(sdb, "total_window_access", "total number accesses of instruction window", &total_window_access, 0, NULL);
1156

    
1157
  stat_reg_counter(sdb, "total_lsq_access", "total number accesses of load/store queue", &total_lsq_access, 0, NULL);
1158

    
1159
  stat_reg_counter(sdb, "total_regfile_access", "total number accesses of arch. regfile", &total_regfile_access, 0, NULL);
1160

    
1161
  stat_reg_counter(sdb, "total_icache_access", "total number accesses of icache", &total_icache_access, 0, NULL);
1162

    
1163
  stat_reg_counter(sdb, "total_dcache_access", "total number accesses of dcache", &total_dcache_access, 0, NULL);
1164

    
1165
  stat_reg_counter(sdb, "total_dcache2_access", "total number accesses of dcache2", &total_dcache2_access, 0, NULL);
1166

    
1167
  stat_reg_counter(sdb, "total_alu_access", "total number accesses of alu", &total_alu_access, 0, NULL);
1168

    
1169
  stat_reg_counter(sdb, "total_resultbus_access", "total number accesses of resultbus", &total_resultbus_access, 0, NULL);
1170

    
1171
  stat_reg_formula(sdb, "avg_rename_access", "avg number accesses of rename unit", "total_rename_access/sim_cycle", NULL);
1172

    
1173
  stat_reg_formula(sdb, "avg_bpred_access", "avg number accesses of bpred unit", "total_bpred_access/sim_cycle", NULL);
1174

    
1175
  stat_reg_formula(sdb, "avg_window_access", "avg number accesses of instruction window", "total_window_access/sim_cycle",  NULL);
1176

    
1177
  stat_reg_formula(sdb, "avg_lsq_access", "avg number accesses of lsq", "total_lsq_access/sim_cycle",  NULL);
1178

    
1179
  stat_reg_formula(sdb, "avg_regfile_access", "avg number accesses of arch. regfile", "total_regfile_access/sim_cycle",  NULL);
1180

    
1181
  stat_reg_formula(sdb, "avg_icache_access", "avg number accesses of icache", "total_icache_access/sim_cycle",  NULL);
1182

    
1183
  stat_reg_formula(sdb, "avg_dcache_access", "avg number accesses of dcache", "total_dcache_access/sim_cycle",  NULL);
1184

    
1185
  stat_reg_formula(sdb, "avg_dcache2_access", "avg number accesses of dcache2", "total_dcache2_access/sim_cycle",  NULL);
1186

    
1187
  stat_reg_formula(sdb, "avg_alu_access", "avg number accesses of alu", "total_alu_access/sim_cycle",  NULL);
1188

    
1189
  stat_reg_formula(sdb, "avg_resultbus_access", "avg number accesses of resultbus", "total_resultbus_access/sim_cycle",  NULL);
1190

    
1191
  stat_reg_counter(sdb, "max_rename_access", "max number accesses of rename unit", &max_rename_access, 0, NULL);
1192

    
1193
  stat_reg_counter(sdb, "max_bpred_access", "max number accesses of bpred unit", &max_bpred_access, 0, NULL);
1194

    
1195
  stat_reg_counter(sdb, "max_window_access", "max number accesses of instruction window", &max_window_access, 0, NULL);
1196

    
1197
  stat_reg_counter(sdb, "max_lsq_access", "max number accesses of load/store queue", &max_lsq_access, 0, NULL);
1198

    
1199
  stat_reg_counter(sdb, "max_regfile_access", "max number accesses of arch. regfile", &max_regfile_access, 0, NULL);
1200

    
1201
  stat_reg_counter(sdb, "max_icache_access", "max number accesses of icache", &max_icache_access, 0, NULL);
1202

    
1203
  stat_reg_counter(sdb, "max_dcache_access", "max number accesses of dcache", &max_dcache_access, 0, NULL);
1204

    
1205
  stat_reg_counter(sdb, "max_dcache2_access", "max number accesses of dcache2", &max_dcache2_access, 0, NULL);
1206

    
1207
  stat_reg_counter(sdb, "max_alu_access", "max number accesses of alu", &max_alu_access, 0, NULL);
1208

    
1209
  stat_reg_counter(sdb, "max_resultbus_access", "max number accesses of resultbus", &max_resultbus_access, 0, NULL);
1210

    
1211
  stat_reg_double(sdb, "max_cycle_power_cc1", "maximum cycle power usage of cc1", &max_cycle_power_cc1, 0, NULL);
1212

    
1213
  stat_reg_double(sdb, "max_cycle_power_cc2", "maximum cycle power usage of cc2", &max_cycle_power_cc2, 0, NULL);
1214

    
1215
  stat_reg_double(sdb, "max_cycle_power_cc3", "maximum cycle power usage of cc3", &max_cycle_power_cc3, 0, NULL);
1216

    
1217
  stat_reg_double(sdb, "parasitic_power_cc1", "parasitic power cc1", &total_parasitic_cc1, 0, NULL);
1218
  stat_reg_double(sdb, "parasitic_power_cc2", "parasitic power cc2", &total_parasitic_cc2, 0, NULL);
1219
  stat_reg_double(sdb, "parasitic_power_cc3", "parasitic power cc3", &total_parasitic_cc3, 0, NULL);
1220
  stat_reg_double(sdb, "min amperage", "min amperage", &min_amp, 0, NULL);
1221
  stat_reg_double(sdb, "max amperage", "max amperage", &max_amp, 0, NULL);
1222
  stat_reg_double(sdb, "slow_cycles", "slow cycles", &slow_cycles, 0, NULL);
1223
  stat_reg_double(sdb, "fast_cycles", "fast cycles", &fast_cycles, 0, NULL);
1224
}
1225

    
1226

    
1227
/* this routine takes the number of rows and cols of an array structure
1228
   and attemps to make it make it more of a reasonable circuit structure
1229
   by trying to make the number of rows and cols as close as possible.
1230
   (scaling both by factors of 2 in opposite directions).  it returns
1231
   a scale factor which is the amount that the rows should be divided
1232
   by and the columns should be multiplied by.
1233
*/
1234
int squarify(int rows, int cols)
1235
{
1236
  int scale_factor = 1;
1237

    
1238
  if(rows == cols)
1239
    return 1;
1240

    
1241
  /*
1242
  printf("init rows == %d\n",rows);
1243
  printf("init cols == %d\n",cols);
1244
  */
1245

    
1246
  while(rows > cols) {
1247
    rows = rows/2;
1248
    cols = cols*2;
1249

    
1250
    /*
1251
    printf("rows == %d\n",rows);
1252
    printf("cols == %d\n",cols);
1253
    printf("scale_factor == %d (2^ == %d)\n\n",scale_factor,(int)pow(2.0,(double)scale_factor));
1254
    */
1255

    
1256
    if (rows/2 <= cols)
1257
      return((int)pow(2.0,(double)scale_factor));
1258
    scale_factor++;
1259
  }
1260

    
1261
  return 1;
1262
}
1263

    
1264
/* could improve squarify to work when rows < cols */
1265

    
1266
double squarify_new(int rows, int cols)
1267
{
1268
  double scale_factor = 0.0;
1269

    
1270
  if(rows==cols)
1271
    return(pow(2.0,scale_factor));
1272

    
1273
  while(rows > cols) {
1274
    rows = rows/2;
1275
    cols = cols*2;
1276
    if (rows <= cols)
1277
      return(pow(2.0,scale_factor));
1278
    scale_factor++;
1279
  }
1280

    
1281
  while(cols > rows) {
1282
    rows = rows*2;
1283
    cols = cols/2;
1284
    if (cols <= rows)
1285
      return(pow(2.0,scale_factor));
1286
    scale_factor--;
1287
  }
1288

    
1289
  return 1;
1290

    
1291
}
1292

    
1293
void dump_power_stats(power)
1294
     power_result_type *power;
1295
{
1296
  double total_power;
1297
  double bpred_power;
1298
  double rename_power;
1299
  double rat_power;
1300
  double dcl_power;
1301
  double lsq_power;
1302
  double window_power;
1303
  double wakeup_power;
1304
  double rs_power;
1305
  double lsq_wakeup_power;
1306
  double lsq_rs_power;
1307
  double regfile_power;
1308
  double reorder_power;
1309
  double icache_power;
1310
  double dcache_power;
1311
  double dcache2_power;
1312
  double dtlb_power;
1313
  double itlb_power;
1314
  double ambient_power = 2.0;
1315

    
1316
  icache_power = power->icache_power;
1317

    
1318
  dcache_power = power->dcache_power;
1319

    
1320
  dcache2_power = power->dcache2_power;
1321

    
1322
  itlb_power = power->itlb;
1323
  dtlb_power = power->dtlb;
1324

    
1325
  bpred_power = power->btb + power->local_predict + power->global_predict + 
1326
    power->chooser + power->ras;
1327

    
1328
  rat_power = power->rat_decoder + 
1329
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
1330

    
1331
  dcl_power = power->dcl_compare + power->dcl_pencode;
1332

    
1333
  rename_power = power->rat_power + power->dcl_power + power->inst_decoder_power;
1334

    
1335
  wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
1336
    power->wakeup_ormatch;
1337
   
1338
  rs_power = power->rs_decoder + 
1339
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
1340

    
1341
  window_power = wakeup_power + rs_power + power->selection;
1342

    
1343
  lsq_rs_power = power->lsq_rs_decoder + 
1344
    power->lsq_rs_wordline + power->lsq_rs_bitline + power->lsq_rs_senseamp;
1345

    
1346
  lsq_wakeup_power = power->lsq_wakeup_tagdrive + 
1347
    power->lsq_wakeup_tagmatch + power->lsq_wakeup_ormatch;
1348

    
1349
  lsq_power = lsq_wakeup_power + lsq_rs_power;
1350

    
1351
  reorder_power = power->reorder_decoder + 
1352
    power->reorder_wordline + power->reorder_bitline + 
1353
    power->reorder_senseamp;
1354

    
1355
  regfile_power = power->regfile_decoder + 
1356
    power->regfile_wordline + power->regfile_bitline + 
1357
    power->regfile_senseamp;
1358

    
1359
  total_power = bpred_power + rename_power + window_power + regfile_power +
1360
    power->resultbus + lsq_power + 
1361
    icache_power + dcache_power + dcache2_power + 
1362
    dtlb_power + itlb_power + power->clock_power + power->ialu_power +
1363
    power->falu_power;
1364

    
1365
  fprintf(stderr,"\nProcessor Parameters:\n");
1366
  fprintf(stderr,"Issue Width: %d\n",ruu_issue_width);
1367
  fprintf(stderr,"Window Size: %d\n",RUU_size);
1368
  fprintf(stderr,"Number of Virtual Registers: %d\n",MD_NUM_IREGS);
1369
  fprintf(stderr,"Number of Physical Registers: %d\n",RUU_size);
1370
  fprintf(stderr,"Datapath Width: %d\n",data_width);
1371

    
1372
  fprintf(stderr,"Total Power Consumption: %g\n",total_power+ambient_power);
1373
  fprintf(stderr,"Branch Predictor Power Consumption: %g  (%.3g%%)\n",bpred_power,100*bpred_power/total_power);
1374
  fprintf(stderr," branch target buffer power (W): %g\n",power->btb);
1375
  fprintf(stderr," local predict power (W): %g\n",power->local_predict);
1376
  fprintf(stderr," global predict power (W): %g\n",power->global_predict);
1377
  fprintf(stderr," chooser power (W): %g\n",power->chooser);
1378
  fprintf(stderr," RAS power (W): %g\n",power->ras);
1379
  fprintf(stderr,"Rename Logic Power Consumption: %g  (%.3g%%)\n",rename_power,100*rename_power/total_power);
1380
  fprintf(stderr," Instruction Decode Power (W): %g\n",power->inst_decoder_power);
1381
  fprintf(stderr," RAT decode_power (W): %g\n",power->rat_decoder);
1382
  fprintf(stderr," RAT wordline_power (W): %g\n",power->rat_wordline);
1383
  fprintf(stderr," RAT bitline_power (W): %g\n",power->rat_bitline);
1384
  fprintf(stderr," DCL Comparators (W): %g\n",power->dcl_compare);
1385
  fprintf(stderr,"Instruction Window Power Consumption: %g  (%.3g%%)\n",window_power,100*window_power/total_power);
1386
  fprintf(stderr," tagdrive (W): %g\n",power->wakeup_tagdrive);
1387
  fprintf(stderr," tagmatch (W): %g\n",power->wakeup_tagmatch);
1388
  fprintf(stderr," Selection Logic (W): %g\n",power->selection);
1389
  fprintf(stderr," decode_power (W): %g\n",power->rs_decoder);
1390
  fprintf(stderr," wordline_power (W): %g\n",power->rs_wordline);
1391
  fprintf(stderr," bitline_power (W): %g\n",power->rs_bitline);
1392
  fprintf(stderr,"Load/Store Queue Power Consumption: %g  (%.3g%%)\n",lsq_power,100*lsq_power/total_power);
1393
  fprintf(stderr," tagdrive (W): %g\n",power->lsq_wakeup_tagdrive);
1394
  fprintf(stderr," tagmatch (W): %g\n",power->lsq_wakeup_tagmatch);
1395
  fprintf(stderr," decode_power (W): %g\n",power->lsq_rs_decoder);
1396
  fprintf(stderr," wordline_power (W): %g\n",power->lsq_rs_wordline);
1397
  fprintf(stderr," bitline_power (W): %g\n",power->lsq_rs_bitline);
1398
  fprintf(stderr,"Arch. Register File Power Consumption: %g  (%.3g%%)\n",regfile_power,100*regfile_power/total_power);
1399
  fprintf(stderr," decode_power (W): %g\n",power->regfile_decoder);
1400
  fprintf(stderr," wordline_power (W): %g\n",power->regfile_wordline);
1401
  fprintf(stderr," bitline_power (W): %g\n",power->regfile_bitline);
1402
  fprintf(stderr,"Result Bus Power Consumption: %g  (%.3g%%)\n",power->resultbus,100*power->resultbus/total_power);
1403
  fprintf(stderr,"Total Clock Power: %g  (%.3g%%)\n",power->clock_power,100*power->clock_power/total_power);
1404
  fprintf(stderr,"Int ALU Power: %g  (%.3g%%)\n",power->ialu_power,100*power->ialu_power/total_power);
1405
  fprintf(stderr,"FP ALU Power: %g  (%.3g%%)\n",power->falu_power,100*power->falu_power/total_power);
1406
  fprintf(stderr,"Instruction Cache Power Consumption: %g  (%.3g%%)\n",icache_power,100*icache_power/total_power);
1407
  fprintf(stderr," decode_power (W): %g\n",power->icache_decoder);
1408
  fprintf(stderr," wordline_power (W): %g\n",power->icache_wordline);
1409
  fprintf(stderr," bitline_power (W): %g\n",power->icache_bitline);
1410
  fprintf(stderr," senseamp_power (W): %g\n",power->icache_senseamp);
1411
  fprintf(stderr," tagarray_power (W): %g\n",power->icache_tagarray);
1412
  fprintf(stderr,"Itlb_power (W): %g (%.3g%%)\n",power->itlb,100*power->itlb/total_power);
1413
  fprintf(stderr,"Data Cache Power Consumption: %g  (%.3g%%)\n",dcache_power,100*dcache_power/total_power);
1414
  fprintf(stderr," decode_power (W): %g\n",power->dcache_decoder);
1415
  fprintf(stderr," wordline_power (W): %g\n",power->dcache_wordline);
1416
  fprintf(stderr," bitline_power (W): %g\n",power->dcache_bitline);
1417
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache_senseamp);
1418
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache_tagarray);
1419
  fprintf(stderr,"Dtlb_power (W): %g (%.3g%%)\n",power->dtlb,100*power->dtlb/total_power);
1420
  fprintf(stderr,"Level 2 Cache Power Consumption: %g (%.3g%%)\n",dcache2_power,100*dcache2_power/total_power);
1421
  fprintf(stderr," decode_power (W): %g\n",power->dcache2_decoder);
1422
  fprintf(stderr," wordline_power (W): %g\n",power->dcache2_wordline);
1423
  fprintf(stderr," bitline_power (W): %g\n",power->dcache2_bitline);
1424
  fprintf(stderr," senseamp_power (W): %g\n",power->dcache2_senseamp);
1425
  fprintf(stderr," tagarray_power (W): %g\n",power->dcache2_tagarray);
1426
}
1427

    
1428
/*======================================================================*/
1429

    
1430

    
1431

    
1432
/* 
1433
 * This part of the code contains routines for each section as
1434
 * described in the tech report.  See the tech report for more details
1435
 * and explanations */
1436

    
1437
/*----------------------------------------------------------------------*/
1438

    
1439
double driver_size(double driving_cap, double desiredrisetime) {
1440
  double nsize, psize;
1441
  double Rpdrive; 
1442

    
1443
  Rpdrive = desiredrisetime/(driving_cap*log(VSINV)*-1.0);
1444
  psize = restowidth(Rpdrive,PCH);
1445
  nsize = restowidth(Rpdrive,NCH);
1446
  if (psize > Wworddrivemax) {
1447
    psize = Wworddrivemax;
1448
  }
1449
  if (psize < 4.0 * LSCALE)
1450
    psize = 4.0 * LSCALE;
1451

    
1452
  return (psize);
1453

    
1454
}
1455

    
1456
/* Decoder delay:  (see section 6.1 of tech report) */
1457

    
1458
double array_decoder_power(rows,cols,predeclength,rports,wports,cache)
1459
     int rows,cols;
1460
     double predeclength;
1461
     int rports,wports;
1462
     int cache;
1463
{
1464
  double Ctotal=0;
1465
  double Ceq=0;
1466
  int numstack;
1467
  int decode_bits=0;
1468
  int ports;
1469
  double rowsb;
1470

    
1471
  /* read and write ports are the same here */
1472
  ports = rports + wports;
1473

    
1474
  rowsb = (double)rows;
1475

    
1476
  /* number of input bits to be decoded */
1477
  decode_bits=ceil((logtwo(rowsb)));
1478

    
1479
  /* First stage: driving the decoders */
1480

    
1481
  /* This is the capacitance for driving one bit (and its complement).
1482
     -There are #rowsb 3->8 decoders contributing gatecap.
1483
     - 2.0 factor from 2 identical sets of drivers in parallel
1484
  */
1485
  Ceq = 2.0*(draincap(Wdecdrivep,PCH,1)+draincap(Wdecdriven,NCH,1)) +
1486
    gatecap(Wdec3to8n+Wdec3to8p,10.0)*rowsb;
1487

    
1488
  /* There are ports * #decode_bits total */
1489
  Ctotal+=ports*decode_bits*Ceq;
1490

    
1491
  if(verbose)
1492
    fprintf(stderr,"Decoder -- Driving decoders            == %g\n",.3*Ctotal*Powerfactor);
1493

    
1494
  /* second stage: driving a bunch of nor gates with a nand 
1495
     numstack is the size of the nor gates -- ie. a 7-128 decoder has
1496
     3-input NAND followed by 3-input NOR  */
1497

    
1498
  numstack = ceil((1.0/3.0)*logtwo(rows));
1499

    
1500
  if (numstack<=0) numstack = 1;
1501
  if (numstack>5) numstack = 5;
1502

    
1503
  /* There are #rowsb NOR gates being driven*/
1504
  Ceq = (3.0*draincap(Wdec3to8p,PCH,1) +draincap(Wdec3to8n,NCH,3) +
1505
         gatecap(WdecNORn+WdecNORp,((numstack*40)+20.0)))*rowsb;
1506

    
1507
  Ctotal+=ports*Ceq;
1508

    
1509
  if(verbose)
1510
    fprintf(stderr,"Decoder -- Driving nor w/ nand         == %g\n",.3*ports*Ceq*Powerfactor);
1511

    
1512
  /* Final stage: driving an inverter with the nor 
1513
     (inverter preceding wordline driver) -- wordline driver is in the next section*/
1514

    
1515
  Ceq = (gatecap(Wdecinvn+Wdecinvp,20.0)+
1516
         numstack*draincap(WdecNORn,NCH,1)+
1517
         draincap(WdecNORp,PCH,numstack));
1518

    
1519
  if(verbose)
1520
    fprintf(stderr,"Decoder -- Driving inverter w/ nor     == %g\n",.3*ports*Ceq*Powerfactor);
1521

    
1522
  Ctotal+=ports*Ceq;
1523

    
1524
  /* assume Activity Factor == .3  */
1525

    
1526
  return(.3*Ctotal*Powerfactor);
1527
}
1528

    
1529
double simple_array_decoder_power(rows,cols,rports,wports,cache)
1530
     int rows,cols;
1531
     int rports,wports;
1532
     int cache;
1533
{
1534
  double predeclength=0.0;
1535
  return(array_decoder_power(rows,cols,predeclength,rports,wports,cache));
1536
}
1537

    
1538

    
1539
double array_wordline_power(rows,cols,wordlinelength,rports,wports,cache)
1540
     int rows,cols;
1541
     double wordlinelength;
1542
     int rports,wports;
1543
     int cache;
1544
{
1545
  double Ctotal=0;
1546
  double Ceq=0;
1547
  double Cline=0;
1548
  double Cliner, Clinew=0;
1549
  double desiredrisetime,psize,nsize;
1550
  int ports;
1551
  double colsb;
1552

    
1553
  ports = rports+wports;
1554

    
1555
  colsb = (double)cols;
1556

    
1557
  /* Calculate size of wordline drivers assuming rise time == Period / 8 
1558
     - estimate cap on line 
1559
     - compute min resistance to achieve this with RC 
1560
     - compute width needed to achieve this resistance */
1561

    
1562
  desiredrisetime = Period/16;
1563
  Cline = (gatecappass(Wmemcellr,1.0))*colsb + wordlinelength*CM3metal;
1564
  psize = driver_size(Cline,desiredrisetime);
1565
  
1566
  /* how do we want to do p-n ratioing? -- here we just assume the same ratio 
1567
     from an inverter pair  */
1568
  nsize = psize * Wdecinvn/Wdecinvp; 
1569
  
1570
  if(verbose)
1571
    fprintf(stderr,"Wordline Driver Sizes -- nsize == %f, psize == %f\n",nsize,psize);
1572

    
1573
  Ceq = draincap(Wdecinvn,NCH,1) + draincap(Wdecinvp,PCH,1) +
1574
    gatecap(nsize+psize,20.0);
1575

    
1576
  Ctotal+=ports*Ceq;
1577

    
1578
  if(verbose)
1579
    fprintf(stderr,"Wordline -- Inverter -> Driver         == %g\n",ports*Ceq*Powerfactor);
1580

    
1581
  /* Compute caps of read wordline and write wordlines 
1582
     - wordline driver caps, given computed width from above
1583
     - read wordlines have 1 nmos access tx, size ~4
1584
     - write wordlines have 2 nmos access tx, size ~2
1585
     - metal line cap
1586
  */
1587

    
1588
  Cliner = (gatecappass(Wmemcellr,(BitWidth-2*Wmemcellr)/2.0))*colsb+
1589
    wordlinelength*CM3metal+
1590
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1591
  Clinew = (2.0*gatecappass(Wmemcellw,(BitWidth-2*Wmemcellw)/2.0))*colsb+
1592
    wordlinelength*CM3metal+
1593
    2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1594

    
1595
  if(verbose) {
1596
    fprintf(stderr,"Wordline -- Line                       == %g\n",1e12*Cline);
1597
    fprintf(stderr,"Wordline -- Line -- access -- gatecap  == %g\n",1e12*colsb*2*gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0));
1598
    fprintf(stderr,"Wordline -- Line -- driver -- draincap == %g\n",1e12*draincap(nsize,NCH,1) + draincap(psize,PCH,1));
1599
    fprintf(stderr,"Wordline -- Line -- metal              == %g\n",1e12*wordlinelength*CM3metal);
1600
  }
1601
  Ctotal+=rports*Cliner+wports*Clinew;
1602

    
1603
  /* AF == 1 assuming a different wordline is charged each cycle, but only
1604
     1 wordline (per port) is actually used */
1605

    
1606
  return(Ctotal*Powerfactor);
1607
}
1608

    
1609
double simple_array_wordline_power(rows,cols,rports,wports,cache)
1610
     int rows,cols;
1611
     int rports,wports;
1612
     int cache;
1613
{
1614
  double wordlinelength;
1615
  int ports = rports + wports;
1616
  wordlinelength = cols *  (RegCellWidth + 2 * ports * BitlineSpacing);
1617
  return(array_wordline_power(rows,cols,wordlinelength,rports,wports,cache));
1618
}
1619

    
1620

    
1621
double array_bitline_power(rows,cols,bitlinelength,rports,wports,cache)
1622
     int rows,cols;
1623
     double bitlinelength;
1624
     int rports,wports;
1625
     int cache;
1626
{
1627
  double Ctotal=0;
1628
  double Ccolmux=0;
1629
  double Cbitrowr=0;
1630
  double Cbitroww=0;
1631
  double Cprerow=0;
1632
  double Cwritebitdrive=0;
1633
  double Cpregate=0;
1634
  double Cliner=0;
1635
  double Clinew=0;
1636
  int ports;
1637
  double rowsb;
1638
  double colsb;
1639

    
1640
  double desiredrisetime, Cline, psize, nsize;
1641

    
1642
  ports = rports + wports;
1643

    
1644
  rowsb = (double)rows;
1645
  colsb = (double)cols;
1646

    
1647
  /* Draincaps of access tx's */
1648

    
1649
  Cbitrowr = draincap(Wmemcellr,NCH,1);
1650
  Cbitroww = draincap(Wmemcellw,NCH,1);
1651

    
1652
  /* Cprerow -- precharge cap on the bitline
1653
     -simple scheme to estimate size of pre-charge tx's in a similar fashion
1654
      to wordline driver size estimation.
1655
     -FIXME: it would be better to use precharge/keeper pairs, i've omitted this
1656
      from this version because it couldn't autosize as easily.
1657
  */
1658

    
1659
  desiredrisetime = Period/8;
1660

    
1661
  Cline = rowsb*Cbitrowr+CM2metal*bitlinelength;
1662
  psize = driver_size(Cline,desiredrisetime);
1663

    
1664
  /* compensate for not having an nmos pre-charging */
1665
  psize = psize + psize * Wdecinvn/Wdecinvp; 
1666

    
1667
  if(verbose)
1668
    printf("Cprerow auto   == %g (psize == %g)\n",draincap(psize,PCH,1),psize);
1669

    
1670
  Cprerow = draincap(psize,PCH,1);
1671

    
1672
  /* Cpregate -- cap due to gatecap of precharge transistors -- tack this
1673
     onto bitline cap, again this could have a keeper */
1674
  Cpregate = 4.0*gatecap(psize,10.0);
1675
  global_clockcap+=rports*cols*2.0*Cpregate;
1676

    
1677
  /* Cwritebitdrive -- write bitline drivers are used instead of the precharge
1678
     stuff for write bitlines
1679
     - 2 inverter drivers within each driver pair */
1680

    
1681
  Cline = rowsb*Cbitroww+CM2metal*bitlinelength;
1682

    
1683
  psize = driver_size(Cline,desiredrisetime);
1684
  nsize = psize * Wdecinvn/Wdecinvp; 
1685

    
1686
  Cwritebitdrive = 2.0*(draincap(psize,PCH,1)+draincap(nsize,NCH,1));
1687

    
1688
  /* 
1689
     reg files (cache==0) 
1690
     => single ended bitlines (1 bitline/col)
1691
     => AFs from pop_count
1692
     caches (cache ==1)
1693
     => double-ended bitlines (2 bitlines/col)
1694
     => AFs = .5 (since one of the two bitlines is always charging/discharging)
1695
  */
1696

    
1697
#ifdef STATIC_AF
1698
  if (cache == 0) {
1699
    /* compute the total line cap for read/write bitlines */
1700
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1701
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1702

    
1703
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1704
       in cache styles) */
1705
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1706
    Ctotal+=(1.0-POPCOUNT_AF)*rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1707
    Ctotal+=.3*wports*cols*(Clinew+Cwritebitdrive);
1708
  } 
1709
  else { 
1710
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1711
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1712
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1713
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1714
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1715
  }
1716
#else
1717
  if (cache == 0) {
1718
    /* compute the total line cap for read/write bitlines */
1719
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
1720
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1721

    
1722
    /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
1723
       in cache styles) */
1724
    Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
1725
    Ctotal += rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
1726
    Ctotal += .3*wports*cols*(Clinew+Cwritebitdrive);
1727
  } 
1728
  else { 
1729
    Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
1730
    Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
1731
    Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
1732
    Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
1733
    Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
1734
  }
1735
#endif
1736

    
1737
  if(verbose) {
1738
    fprintf(stderr,"Bitline -- Precharge                   == %g\n",1e12*Cpregate);
1739
    fprintf(stderr,"Bitline -- Line                        == %g\n",1e12*(Cliner+Clinew));
1740
    fprintf(stderr,"Bitline -- Line -- access draincap     == %g\n",1e12*rowsb*Cbitrowr);
1741
    fprintf(stderr,"Bitline -- Line -- precharge draincap  == %g\n",1e12*Cprerow);
1742
    fprintf(stderr,"Bitline -- Line -- metal               == %g\n",1e12*bitlinelength*CM2metal);
1743
    fprintf(stderr,"Bitline -- Colmux                      == %g\n",1e12*Ccolmux);
1744

    
1745
    fprintf(stderr,"\n");
1746
  }
1747

    
1748

    
1749
  if(cache==0)
1750
    return(Ctotal*Powerfactor);
1751
  else
1752
    return(Ctotal*SensePowerfactor*.4);
1753
  
1754
}
1755

    
1756

    
1757
double simple_array_bitline_power(rows,cols,rports,wports,cache)
1758
     int rows,cols;
1759
     int rports,wports;
1760
     int cache;
1761
{
1762
  double bitlinelength;
1763

    
1764
  int ports = rports + wports;
1765

    
1766
  bitlinelength = rows * (RegCellHeight + ports * WordlineSpacing);
1767

    
1768
  return (array_bitline_power(rows,cols,bitlinelength,rports,wports,cache));
1769

    
1770
}
1771

    
1772
/* estimate senseamp power dissipation in cache structures (Zyuban's method) */
1773
double senseamp_power(int cols)
1774
{
1775
  return((double)cols * Vdd/8 * .5e-3);
1776
}
1777

    
1778
/* estimate comparator power consumption (this comparator is similar
1779
   to the tag-match structure in a CAM */
1780
double compare_cap(int compare_bits)
1781
{
1782
  double c1, c2;
1783
  /* bottom part of comparator */
1784
  c2 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2))+
1785
    draincap(Wevalinvp,PCH,1) + draincap(Wevalinvn,NCH,1);
1786

    
1787
  /* top part of comparator */
1788
  c1 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2)+
1789
                       draincap(Wcomppreequ,NCH,1)) +
1790
    gatecap(WdecNORn,1.0)+
1791
    gatecap(WdecNORp,3.0);
1792

    
1793
  return(c1 + c2);
1794
}
1795

    
1796
/* power of depency check logic */
1797
double dcl_compare_power(int compare_bits)
1798
{
1799
  double Ctotal;
1800
  int num_comparators;
1801
  
1802
  num_comparators = (ruu_decode_width - 1) * (ruu_decode_width);
1803

    
1804
  Ctotal = num_comparators * compare_cap(compare_bits);
1805

    
1806
  return(Ctotal*Powerfactor*AF);
1807
}
1808

    
1809
double simple_array_power(rows,cols,rports,wports,cache)
1810
     int rows,cols;
1811
     int rports,wports;
1812
     int cache;
1813
{
1814
  if(cache==0)
1815
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1816
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1817
            simple_array_bitline_power(rows,cols,rports,wports,cache));
1818
  else
1819
    return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
1820
            simple_array_wordline_power(rows,cols,rports,wports,cache)+
1821
            simple_array_bitline_power(rows,cols,rports,wports,cache)+
1822
            senseamp_power(cols));
1823
}
1824

    
1825

    
1826
double cam_tagdrive(rows,cols,rports,wports)
1827
     int rows,cols,rports,wports;
1828
{
1829
  double Ctotal, Ctlcap, Cblcap, Cwlcap;
1830
  double taglinelength;
1831
  double wordlinelength;
1832
  double nsize, psize;
1833
  int ports;
1834
  Ctotal=0;
1835

    
1836
  ports = rports + wports;
1837

    
1838
  taglinelength = rows * 
1839
    (CamCellHeight + ports * MatchlineSpacing);
1840

    
1841
  wordlinelength = cols * 
1842
    (CamCellWidth + ports * TaglineSpacing);
1843

    
1844
  /* Compute tagline cap */
1845
  Ctlcap = Cmetal * taglinelength + 
1846
    rows * gatecappass(Wcomparen2,2.0) +
1847
    draincap(Wcompdrivern,NCH,1)+draincap(Wcompdriverp,PCH,1);
1848

    
1849
  /* Compute bitline cap (for writing new tags) */
1850
  Cblcap = Cmetal * taglinelength +
1851
    rows * draincap(Wmemcellr,NCH,2);
1852

    
1853
  /* autosize wordline driver */
1854
  psize = driver_size(Cmetal * wordlinelength + 2 * cols * gatecap(Wmemcellr,2.0),Period/8);
1855
  nsize = psize * Wdecinvn/Wdecinvp; 
1856

    
1857
  /* Compute wordline cap (for writing new tags) */
1858
  Cwlcap = Cmetal * wordlinelength + 
1859
    draincap(nsize,NCH,1)+draincap(psize,PCH,1) +
1860
    2 * cols * gatecap(Wmemcellr,2.0);
1861
    
1862
  Ctotal += (rports * cols * 2 * Ctlcap) + 
1863
    (wports * ((cols * 2 * Cblcap) + (rows * Cwlcap)));
1864

    
1865
  return(Ctotal*Powerfactor*AF);
1866
}
1867

    
1868
double cam_tagmatch(rows,cols,rports,wports)
1869
     int rows,cols,rports,wports;
1870
{
1871
  double Ctotal, Cmlcap;
1872
  double matchlinelength;
1873
  int ports;
1874
  Ctotal=0;
1875

    
1876
  ports = rports + wports;
1877

    
1878
  matchlinelength = cols * 
1879
    (CamCellWidth + ports * TaglineSpacing);
1880

    
1881
  Cmlcap = 2 * cols * draincap(Wcomparen1,NCH,2) + 
1882
    Cmetal * matchlinelength + draincap(Wmatchpchg,NCH,1) +
1883
    gatecap(Wmatchinvn+Wmatchinvp,10.0) +
1884
    gatecap(Wmatchnandn+Wmatchnandp,10.0);
1885

    
1886
  Ctotal += rports * rows * Cmlcap;
1887

    
1888
  global_clockcap += rports * rows * gatecap(Wmatchpchg,5.0);
1889
  
1890
  /* noring the nanded match lines */
1891
  if(ruu_issue_width >= 8)
1892
    Ctotal += 2 * gatecap(Wmatchnorn+Wmatchnorp,10.0);
1893

    
1894
  return(Ctotal*Powerfactor*AF);
1895
}
1896

    
1897
double cam_array(rows,cols,rports,wports)
1898
     int rows,cols,rports,wports;
1899
{
1900
  return(cam_tagdrive(rows,cols,rports,wports) +
1901
         cam_tagmatch(rows,cols,rports,wports));
1902
}
1903

    
1904

    
1905
double selection_power(int win_entries)
1906
{
1907
  double Ctotal, Cor, Cpencode;
1908
  int num_arbiter=1;
1909

    
1910
  Ctotal=0;
1911

    
1912
  while(win_entries > 4)
1913
    {
1914
      win_entries = (int)ceil((double)win_entries / 4.0);
1915
      num_arbiter += win_entries;
1916
    }
1917

    
1918
  Cor = 4 * draincap(WSelORn,NCH,1) + draincap(WSelORprequ,PCH,1);
1919

    
1920
  Cpencode = draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,1) + 
1921
    2*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,2) + 
1922
    3*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,3) + 
1923
    4*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,4) + 
1924
    4*gatecap(WSelEnn+WSelEnp,20.0) + 
1925
    4*draincap(WSelEnn,NCH,1) + 4*draincap(WSelEnp,PCH,1);
1926

    
1927
  Ctotal += ruu_issue_width * num_arbiter*(Cor+Cpencode);
1928

    
1929
  return(Ctotal*Powerfactor*AF);
1930
}
1931

    
1932
/* very rough clock power estimates */
1933
double total_clockpower(double die_length)
1934
{
1935

    
1936
  double clocklinelength;
1937
  double Cline,Cline2,Ctotal;
1938
  double pipereg_clockcap=0;
1939
  double global_buffercap = 0;
1940
  double Clockpower;
1941

    
1942
  double num_piperegs;
1943

    
1944
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
1945

    
1946
  /* Assume say 8 stages (kinda low now).
1947
     FIXME: this could be a lot better; user could input
1948
     number of pipestages, etc  */
1949

    
1950
  /* assume 8 pipe stages and try to estimate bits per pipe stage */
1951
  /* pipe stage 0/1 */
1952
  num_piperegs = ruu_issue_width*inst_length + data_width;
1953
  /* pipe stage 1/2 */
1954
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1955
  /* pipe stage 2/3 */
1956
  num_piperegs += ruu_issue_width*(inst_length + 3 * RUU_size);
1957
  /* pipe stage 3/4 */
1958
  num_piperegs += ruu_issue_width*(3 * npreg_width + pow2(opcode_length));
1959
  /* pipe stage 4/5 */
1960
  num_piperegs += ruu_issue_width*(2*data_width + pow2(opcode_length));
1961
  /* pipe stage 5/6 */
1962
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1963
  /* pipe stage 6/7 */
1964
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1965
  /* pipe stage 7/8 */
1966
  num_piperegs += ruu_issue_width*(data_width + pow2(opcode_length));
1967

    
1968
  /* assume 50% extra in control signals (rule of thumb) */
1969
  num_piperegs = num_piperegs * 1.5;
1970

    
1971
  pipereg_clockcap = num_piperegs * 4*gatecap(10.0,0);
1972

    
1973
  /* estimate based on 3% of die being in clock metal */
1974
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
1975

    
1976
  /* another estimate */
1977
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
1978
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
1979
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
1980
  /* global_clockcap is computed within each array structure for pre-charge tx's*/
1981
  Ctotal = Cline+global_clockcap+pipereg_clockcap+global_buffercap;
1982

    
1983
  if(verbose)
1984
    fprintf(stderr,"num_piperegs == %f\n",num_piperegs);
1985

    
1986
  /* add I_ADD Clockcap and F_ADD Clockcap */
1987
  Clockpower = Ctotal*Powerfactor + res_ialu*I_ADD_CLOCK + res_fpalu*F_ADD_CLOCK;
1988

    
1989
  if(verbose) {
1990
    fprintf(stderr,"Global Clock Power: %g\n",Clockpower);
1991
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
1992
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
1993
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
1994
    fprintf(stderr," Global Clock Cap (Explicit) (W): %g\n",global_clockcap*Powerfactor+I_ADD_CLOCK+F_ADD_CLOCK);
1995
    fprintf(stderr," Global Clock Cap (Implicit) (W): %g\n",pipereg_clockcap*Powerfactor);
1996
  }
1997
  return(Clockpower);
1998

    
1999
}
2000

    
2001
/* very rough global clock power estimates */
2002
double global_clockpower(double die_length)
2003
{
2004

    
2005
  double clocklinelength;
2006
  double Cline,Cline2,Ctotal;
2007
  double global_buffercap = 0;
2008

    
2009
  Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
2010

    
2011
  clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
2012
  Cline = 20 * Cmetal * (clocklinelength) * 1e6;
2013
  global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
2014
  Ctotal = Cline+global_buffercap;
2015

    
2016
  if(verbose) {
2017
    fprintf(stderr,"Global Clock Power: %g\n",Ctotal*Powerfactor);
2018
    fprintf(stderr," Global Metal Lines   (W): %g\n",Cline*Powerfactor);
2019
    fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
2020
    fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
2021
  }
2022

    
2023
  return(Ctotal*Powerfactor);
2024

    
2025
}
2026

    
2027

    
2028
double compute_resultbus_power()
2029
{
2030
  double Ctotal, Cline;
2031

    
2032
  double regfile_height;
2033

    
2034
  /* compute size of result bus tags */
2035
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2036

    
2037
  Ctotal=0;
2038

    
2039
  regfile_height = RUU_size * (RegCellHeight + 
2040
                               WordlineSpacing * 3 * ruu_issue_width); 
2041

    
2042
  /* assume num alu's == ialu  (FIXME: generate a more detailed result bus network model*/
2043
  Cline = Cmetal * (regfile_height + .5 * res_ialu * 3200.0 * LSCALE);
2044

    
2045
  /* or use result bus length measured from 21264 die photo */
2046
  /*  Cline = Cmetal * 3.3*1000;*/
2047

    
2048
  /* Assume ruu_issue_width result busses -- power can be scaled linearly
2049
     for number of result busses (scale by writeback_access) */
2050
  Ctotal += 2.0 * (data_width + npreg_width) * (ruu_issue_width)* Cline;
2051

    
2052
#ifdef STATIC_AF
2053
  return(Ctotal*Powerfactor*AF);
2054
#else
2055
  return(Ctotal*Powerfactor);
2056
#endif
2057
  
2058
}
2059

    
2060
void calculate_power(power)
2061
     power_result_type *power;
2062
{
2063
  double clockpower;
2064
  double predeclength, wordlinelength, bitlinelength;
2065
  int ndwl, ndbl, nspd, ntwl, ntbl, ntspd, c,b,a,cache, rowsb, colsb;
2066
  int trowsb, tcolsb, tagsize;
2067
  int va_size = 48;
2068

    
2069
  int npreg_width = (int)ceil(logtwo((double)RUU_size));
2070

    
2071
  /* these variables are needed to use Cacti to auto-size cache arrays 
2072
     (for optimal delay) */
2073
  time_result_type time_result;
2074
  time_parameter_type time_parameters;
2075

    
2076
  /* used to autosize other structures, like bpred tables */
2077
  int scale_factor;
2078

    
2079
  global_clockcap = 0;
2080

    
2081
  cache=0;
2082

    
2083

    
2084
  /* FIXME: ALU power is a simple constant, it would be better
2085
     to include bit AFs and have different numbers for different
2086
     types of operations */
2087
  power->ialu_power = res_ialu * I_ADD;
2088
  power->falu_power = res_fpalu * F_ADD;
2089

    
2090
  nvreg_width = (int)ceil(logtwo((double)MD_NUM_IREGS));
2091
  npreg_width = (int)ceil(logtwo((double)RUU_size));
2092

    
2093

    
2094
  /* RAT has shadow bits stored in each cell, this makes the
2095
     cell size larger than normal array structures, so we must
2096
     compute it here */
2097

    
2098
  predeclength = MD_NUM_IREGS * 
2099
    (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2100

    
2101
  wordlinelength = npreg_width * 
2102
    (RatCellWidth + 
2103
     6 * ruu_decode_width * BitlineSpacing + 
2104
     RatShiftRegWidth*RatNumShift);
2105

    
2106
  bitlinelength = MD_NUM_IREGS * (RatCellHeight + 3 * ruu_decode_width * WordlineSpacing);
2107

    
2108
  if(verbose)
2109
    fprintf(stderr,"rat power stats\n");
2110
  power->rat_decoder = array_decoder_power(MD_NUM_IREGS,npreg_width,predeclength,2*ruu_decode_width,ruu_decode_width,cache);
2111
  power->rat_wordline = array_wordline_power(MD_NUM_IREGS,npreg_width,wordlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2112
  power->rat_bitline = array_bitline_power(MD_NUM_IREGS,npreg_width,bitlinelength,2*ruu_decode_width,ruu_decode_width,cache);
2113
  power->rat_senseamp = 0;
2114

    
2115
  power->dcl_compare = dcl_compare_power(nvreg_width);
2116
  power->dcl_pencode = 0;
2117
  power->inst_decoder_power = ruu_decode_width * simple_array_decoder_power(opcode_length,1,1,1,cache);
2118
  power->wakeup_tagdrive =cam_tagdrive(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2119
  power->wakeup_tagmatch =cam_tagmatch(RUU_size,npreg_width,ruu_issue_width,ruu_issue_width);
2120
  power->wakeup_ormatch =0; 
2121

    
2122
  power->selection = selection_power(RUU_size);
2123

    
2124

    
2125
  predeclength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2126

    
2127
  wordlinelength = data_width * 
2128
    (RegCellWidth + 
2129
     6 * ruu_issue_width * BitlineSpacing);
2130

    
2131
  bitlinelength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2132

    
2133
  if(verbose)
2134
    fprintf(stderr,"regfile power stats\n");
2135

    
2136
  power->regfile_decoder = array_decoder_power(MD_NUM_IREGS,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2137
  power->regfile_wordline = array_wordline_power(MD_NUM_IREGS,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2138
  power->regfile_bitline = array_bitline_power(MD_NUM_IREGS,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2139
  power->regfile_senseamp =0;
2140

    
2141
  predeclength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2142

    
2143
  wordlinelength = data_width * 
2144
    (RegCellWidth + 
2145
     6 * ruu_issue_width * BitlineSpacing);
2146

    
2147
  bitlinelength = RUU_size * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
2148

    
2149
  if(verbose)
2150
    fprintf(stderr,"res station power stats\n");
2151
  power->rs_decoder = array_decoder_power(RUU_size,data_width,predeclength,2*ruu_issue_width,ruu_issue_width,cache);
2152
  power->rs_wordline = array_wordline_power(RUU_size,data_width,wordlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2153
  power->rs_bitline = array_bitline_power(RUU_size,data_width,bitlinelength,2*ruu_issue_width,ruu_issue_width,cache);
2154
  /* no senseamps in reg file structures (only caches) */
2155
  power->rs_senseamp =0;
2156

    
2157
  /* addresses go into lsq tag's */
2158
  power->lsq_wakeup_tagdrive =cam_tagdrive(LSQ_size,data_width,res_memport,res_memport);
2159
  power->lsq_wakeup_tagmatch =cam_tagmatch(LSQ_size,data_width,res_memport,res_memport);
2160
  power->lsq_wakeup_ormatch =0; 
2161

    
2162
  wordlinelength = data_width * 
2163
    (RegCellWidth + 
2164
     4 * res_memport * BitlineSpacing);
2165

    
2166
  bitlinelength = RUU_size * (RegCellHeight + 4 * res_memport * WordlineSpacing);
2167

    
2168
  /* rs's hold data */
2169
  if(verbose)
2170
    fprintf(stderr,"lsq station power stats\n");
2171
  power->lsq_rs_decoder = array_decoder_power(LSQ_size,data_width,predeclength,res_memport,res_memport,cache);
2172
  power->lsq_rs_wordline = array_wordline_power(LSQ_size,data_width,wordlinelength,res_memport,res_memport,cache);
2173
  power->lsq_rs_bitline = array_bitline_power(LSQ_size,data_width,bitlinelength,res_memport,res_memport,cache);
2174
  power->lsq_rs_senseamp =0;
2175

    
2176
  power->resultbus = compute_resultbus_power();
2177

    
2178
  /* Load cache values into what cacti is expecting */
2179
  time_parameters.cache_size = btb_config[0] * (data_width/8) * btb_config[1]; /* C */
2180
  time_parameters.block_size = (data_width/8); /* B */
2181
  time_parameters.associativity = btb_config[1]; /* A */
2182
  time_parameters.number_of_sets = btb_config[0]; /* C/(B*A) */
2183

    
2184
  /* have Cacti compute optimal cache config */
2185
  calculate_time(&time_result,&time_parameters);
2186
  output_data(&time_result,&time_parameters);
2187

    
2188
  /* extract Cacti results */
2189
  ndwl=time_result.best_Ndwl;
2190
  ndbl=time_result.best_Ndbl;
2191
  nspd=time_result.best_Nspd;
2192
  ntwl=time_result.best_Ntwl;
2193
  ntbl=time_result.best_Ntbl;
2194
  ntspd=time_result.best_Ntspd;
2195
  c = time_parameters.cache_size;
2196
  b = time_parameters.block_size;
2197
  a = time_parameters.associativity; 
2198

    
2199
  cache=1;
2200

    
2201
  /* Figure out how many rows/cols there are now */
2202
  rowsb = c/(b*a*ndbl*nspd);
2203
  colsb = 8*b*a*nspd/ndwl;
2204

    
2205
  if(verbose) {
2206
    fprintf(stderr,"%d KB %d-way btb (%d-byte block size):\n",c,a,b);
2207
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2208
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2209
  }
2210

    
2211
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2212
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2213
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2214

    
2215
  if(verbose)
2216
    fprintf(stderr,"btb power stats\n");
2217
  power->btb = ndwl*ndbl*(array_decoder_power(rowsb,colsb,predeclength,1,1,cache) + array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache) + array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache) + senseamp_power(colsb));
2218

    
2219
  cache=1;
2220

    
2221
  scale_factor = squarify(twolev_config[0],twolev_config[2]);
2222
  predeclength = (twolev_config[0] / scale_factor)* (RegCellHeight + WordlineSpacing);
2223
  wordlinelength = twolev_config[2] * scale_factor *  (RegCellWidth + BitlineSpacing);
2224
  bitlinelength = (twolev_config[0] / scale_factor) * (RegCellHeight + WordlineSpacing);
2225

    
2226
  if(verbose)
2227
    fprintf(stderr,"local predict power stats\n");
2228

    
2229
  power->local_predict = array_decoder_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,bitlinelength,1,1,cache) + senseamp_power(twolev_config[2]*scale_factor);
2230

    
2231
  scale_factor = squarify(twolev_config[1],3);
2232

    
2233
  predeclength = (twolev_config[1] / scale_factor)* (RegCellHeight + WordlineSpacing);
2234
  wordlinelength = 3 * scale_factor *  (RegCellWidth + BitlineSpacing);
2235
  bitlinelength = (twolev_config[1] / scale_factor) * (RegCellHeight + WordlineSpacing);
2236

    
2237

    
2238
  if(verbose)
2239
    fprintf(stderr,"local predict power stats\n");
2240
  power->local_predict += array_decoder_power(twolev_config[1]/scale_factor,3*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[1]/scale_factor,3*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[1]/scale_factor,3*scale_factor,bitlinelength,1,1,cache) + senseamp_power(3*scale_factor);
2241

    
2242
  if(verbose)
2243
    fprintf(stderr,"bimod_config[0] == %d\n",bimod_config[0]);
2244

    
2245
  scale_factor = squarify(bimod_config[0],2);
2246

    
2247
  predeclength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2248
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2249
  bitlinelength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2250

    
2251

    
2252
  if(verbose)
2253
    fprintf(stderr,"global predict power stats\n");
2254
  power->global_predict = array_decoder_power(bimod_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(bimod_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(bimod_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2255

    
2256
  scale_factor = squarify(comb_config[0],2);
2257

    
2258
  predeclength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2259
  wordlinelength = 2*scale_factor *  (RegCellWidth + BitlineSpacing);
2260
  bitlinelength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
2261

    
2262
  if(verbose)
2263
    fprintf(stderr,"chooser predict power stats\n");
2264
  power->chooser = array_decoder_power(comb_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(comb_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(comb_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
2265

    
2266
  if(verbose)
2267
    fprintf(stderr,"RAS predict power stats\n");
2268
  power->ras = simple_array_power(ras_size,data_width,1,1,0);
2269

    
2270
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2271

    
2272
  if(verbose)
2273
    fprintf(stderr,"dtlb predict power stats\n");
2274
  power->dtlb = res_memport*(cam_array(dtlb->nsets, va_size - (int)logtwo((double)dtlb->bsize),1,1) + simple_array_power(dtlb->nsets,tagsize,1,1,cache));
2275

    
2276
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2277

    
2278
  predeclength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2279
  wordlinelength = logtwo((double)itlb->bsize) * (RegCellWidth + BitlineSpacing);
2280
  bitlinelength = itlb->nsets * (RegCellHeight + WordlineSpacing);
2281

    
2282
  if(verbose)
2283
    fprintf(stderr,"itlb predict power stats\n");
2284
  power->itlb = cam_array(itlb->nsets, va_size - (int)logtwo((double)itlb->bsize),1,1) + simple_array_power(itlb->nsets,tagsize,1,1,cache);
2285

    
2286

    
2287
  cache=1;
2288

    
2289
  time_parameters.cache_size = cache_il1->nsets * cache_il1->bsize * cache_il1->assoc; /* C */
2290
  time_parameters.block_size = cache_il1->bsize; /* B */
2291
  time_parameters.associativity = cache_il1->assoc; /* A */
2292
  time_parameters.number_of_sets = cache_il1->nsets; /* C/(B*A) */
2293

    
2294
  calculate_time(&time_result,&time_parameters);
2295
  output_data(&time_result,&time_parameters);
2296

    
2297
  ndwl=time_result.best_Ndwl;
2298
  ndbl=time_result.best_Ndbl;
2299
  nspd=time_result.best_Nspd;
2300
  ntwl=time_result.best_Ntwl;
2301
  ntbl=time_result.best_Ntbl;
2302
  ntspd=time_result.best_Ntspd;
2303

    
2304
  c = time_parameters.cache_size;
2305
  b = time_parameters.block_size;
2306
  a = time_parameters.associativity;
2307

    
2308
  rowsb = c/(b*a*ndbl*nspd);
2309
  colsb = 8*b*a*nspd/ndwl;
2310

    
2311
  tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
2312
  trowsb = c/(b*a*ntbl*ntspd);
2313
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2314
 
2315
  if(verbose) {
2316
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2317
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2318
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2319
    fprintf(stderr,"tagsize == %d\n",tagsize);
2320
  }
2321

    
2322
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2323
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2324
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2325

    
2326
  if(verbose)
2327
    fprintf(stderr,"icache power stats\n");
2328
  power->icache_decoder = ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2329
  power->icache_wordline = ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2330
  power->icache_bitline = ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2331
  power->icache_senseamp = ndwl*ndbl*senseamp_power(colsb);
2332
  power->icache_tagarray = ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2333

    
2334
  power->icache_power = power->icache_decoder + power->icache_wordline + power->icache_bitline + power->icache_senseamp + power->icache_tagarray;
2335

    
2336
  time_parameters.cache_size = cache_dl1->nsets * cache_dl1->bsize * cache_dl1->assoc; /* C */
2337
  time_parameters.block_size = cache_dl1->bsize; /* B */
2338
  time_parameters.associativity = cache_dl1->assoc; /* A */
2339
  time_parameters.number_of_sets = cache_dl1->nsets; /* C/(B*A) */
2340

    
2341
  calculate_time(&time_result,&time_parameters);
2342
  output_data(&time_result,&time_parameters);
2343

    
2344
  ndwl=time_result.best_Ndwl;
2345
  ndbl=time_result.best_Ndbl;
2346
  nspd=time_result.best_Nspd;
2347
  ntwl=time_result.best_Ntwl;
2348
  ntbl=time_result.best_Ntbl;
2349
  ntspd=time_result.best_Ntspd;
2350
  c = time_parameters.cache_size;
2351
  b = time_parameters.block_size;
2352
  a = time_parameters.associativity; 
2353

    
2354
  cache=1;
2355

    
2356
  rowsb = c/(b*a*ndbl*nspd);
2357
  colsb = 8*b*a*nspd/ndwl;
2358

    
2359
  tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
2360
  trowsb = c/(b*a*ntbl*ntspd);
2361
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2362

    
2363
  if(verbose) {
2364
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2365
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2366
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2367
    fprintf(stderr,"tagsize == %d\n",tagsize);
2368

    
2369
    fprintf(stderr,"\nntwl == %d, ntbl == %d, ntspd == %d\n",ntwl,ntbl,ntspd);
2370
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ntwl*ntbl,trowsb,tcolsb);
2371
  }
2372

    
2373
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2374
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2375
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2376

    
2377
  if(verbose)
2378
    fprintf(stderr,"dcache power stats\n");
2379
  power->dcache_decoder = res_memport*ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2380
  power->dcache_wordline = res_memport*ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2381
  power->dcache_bitline = res_memport*ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2382
  power->dcache_senseamp = res_memport*ndwl*ndbl*senseamp_power(colsb);
2383
  power->dcache_tagarray = res_memport*ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
2384

    
2385
  power->dcache_power = power->dcache_decoder + power->dcache_wordline + power->dcache_bitline + power->dcache_senseamp + power->dcache_tagarray;
2386

    
2387
  clockpower = total_clockpower(.018);
2388
  power->clock_power = clockpower;
2389
  if(verbose) {
2390
    fprintf(stderr,"result bus power == %f\n",power->resultbus);
2391
    fprintf(stderr,"global clock power == %f\n",clockpower);
2392
  }
2393

    
2394
  time_parameters.cache_size = cache_dl2->nsets * cache_dl2->bsize * cache_dl2->assoc; /* C */
2395
  time_parameters.block_size = cache_dl2->bsize; /* B */
2396
  time_parameters.associativity = cache_dl2->assoc; /* A */
2397
  time_parameters.number_of_sets = cache_dl2->nsets; /* C/(B*A) */
2398

    
2399
  calculate_time(&time_result,&time_parameters);
2400
  output_data(&time_result,&time_parameters);
2401

    
2402
  ndwl=time_result.best_Ndwl;
2403
  ndbl=time_result.best_Ndbl;
2404
  nspd=time_result.best_Nspd;
2405
  ntwl=time_result.best_Ntwl;
2406
  ntbl=time_result.best_Ntbl;
2407
  ntspd=time_result.best_Ntspd;
2408
  c = time_parameters.cache_size;
2409
  b = time_parameters.block_size;
2410
  a = time_parameters.associativity;
2411

    
2412
  rowsb = c/(b*a*ndbl*nspd);
2413
  colsb = 8*b*a*nspd/ndwl;
2414

    
2415
  tagsize = va_size - ((int)logtwo(cache_dl2->nsets) + (int)logtwo(cache_dl2->bsize));
2416
  trowsb = c/(b*a*ntbl*ntspd);
2417
  tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
2418

    
2419
  if(verbose) {
2420
    fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
2421
    fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
2422
    fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
2423
    fprintf(stderr,"tagsize == %d\n",tagsize);
2424
  }
2425

    
2426
  predeclength = rowsb * (RegCellHeight + WordlineSpacing);
2427
  wordlinelength = colsb *  (RegCellWidth + BitlineSpacing);
2428
  bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
2429

    
2430
  if(verbose)
2431
    fprintf(stderr,"dcache2 power stats\n");
2432
  power->dcache2_decoder = array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
2433
  power->dcache2_wordline = array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
2434
  power->dcache2_bitline = array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
2435
  power->dcache2_senseamp = senseamp_power(colsb);
2436
  power->dcache2_tagarray = simple_array_power(trowsb,tcolsb,1,1,cache);
2437

    
2438
  power->dcache2_power = power->dcache2_decoder + power->dcache2_wordline + power->dcache2_bitline + power->dcache2_senseamp + power->dcache2_tagarray;
2439

    
2440
  power->rat_decoder *= crossover_scaling;
2441
  power->rat_wordline *= crossover_scaling;
2442
  power->rat_bitline *= crossover_scaling;
2443

    
2444
  power->dcl_compare *= crossover_scaling;
2445
  power->dcl_pencode *= crossover_scaling;
2446
  power->inst_decoder_power *= crossover_scaling;
2447
  power->wakeup_tagdrive *= crossover_scaling;
2448
  power->wakeup_tagmatch *= crossover_scaling;
2449
  power->wakeup_ormatch *= crossover_scaling;
2450

    
2451
  power->selection *= crossover_scaling;
2452

    
2453
  power->regfile_decoder *= crossover_scaling;
2454
  power->regfile_wordline *= crossover_scaling;
2455
  power->regfile_bitline *= crossover_scaling;
2456
  power->regfile_senseamp *= crossover_scaling;
2457

    
2458
  power->rs_decoder *= crossover_scaling;
2459
  power->rs_wordline *= crossover_scaling;
2460
  power->rs_bitline *= crossover_scaling;
2461
  power->rs_senseamp *= crossover_scaling;
2462

    
2463
  power->lsq_wakeup_tagdrive *= crossover_scaling;
2464
  power->lsq_wakeup_tagmatch *= crossover_scaling;
2465

    
2466
  power->lsq_rs_decoder *= crossover_scaling;
2467
  power->lsq_rs_wordline *= crossover_scaling;
2468
  power->lsq_rs_bitline *= crossover_scaling;
2469
  power->lsq_rs_senseamp *= crossover_scaling;
2470
 
2471
  power->resultbus *= crossover_scaling;
2472

    
2473
  power->btb *= crossover_scaling;
2474
  power->local_predict *= crossover_scaling;
2475
  power->global_predict *= crossover_scaling;
2476
  power->chooser *= crossover_scaling;
2477

    
2478
  power->dtlb *= crossover_scaling;
2479

    
2480
  power->itlb *= crossover_scaling;
2481

    
2482
  power->icache_decoder *= crossover_scaling;
2483
  power->icache_wordline*= crossover_scaling;
2484
  power->icache_bitline *= crossover_scaling;
2485
  power->icache_senseamp*= crossover_scaling;
2486
  power->icache_tagarray*= crossover_scaling;
2487

    
2488
  power->icache_power *= crossover_scaling;
2489

    
2490
  power->dcache_decoder *= crossover_scaling;
2491
  power->dcache_wordline *= crossover_scaling;
2492
  power->dcache_bitline *= crossover_scaling;
2493
  power->dcache_senseamp *= crossover_scaling;
2494
  power->dcache_tagarray *= crossover_scaling;
2495

    
2496
  power->dcache_power *= crossover_scaling;
2497
  
2498
  power->clock_power *= crossover_scaling;
2499

    
2500
  power->dcache2_decoder *= crossover_scaling;
2501
  power->dcache2_wordline *= crossover_scaling;
2502
  power->dcache2_bitline *= crossover_scaling;
2503
  power->dcache2_senseamp *= crossover_scaling;
2504
  power->dcache2_tagarray *= crossover_scaling;
2505

    
2506
  power->dcache2_power *= crossover_scaling;
2507

    
2508
  power->total_power = power->local_predict + power->global_predict + 
2509
    power->chooser + power->btb +
2510
    power->rat_decoder + power->rat_wordline + 
2511
    power->rat_bitline + power->rat_senseamp + 
2512
    power->dcl_compare + power->dcl_pencode + 
2513
    power->inst_decoder_power +
2514
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2515
    power->selection +
2516
    power->regfile_decoder + power->regfile_wordline + 
2517
    power->regfile_bitline + power->regfile_senseamp +  
2518
    power->rs_decoder + power->rs_wordline +
2519
    power->rs_bitline + power->rs_senseamp + 
2520
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2521
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2522
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2523
    power->resultbus +
2524
    power->clock_power +
2525
    power->icache_power + 
2526
    power->itlb + 
2527
    power->dcache_power + 
2528
    power->dtlb + 
2529
    power->dcache2_power;
2530

    
2531
  power->total_power_nodcache2 =power->local_predict + power->global_predict + 
2532
    power->chooser + power->btb +
2533
    power->rat_decoder + power->rat_wordline + 
2534
    power->rat_bitline + power->rat_senseamp + 
2535
    power->dcl_compare + power->dcl_pencode + 
2536
    power->inst_decoder_power +
2537
    power->wakeup_tagdrive + power->wakeup_tagmatch + 
2538
    power->selection +
2539
    power->regfile_decoder + power->regfile_wordline + 
2540
    power->regfile_bitline + power->regfile_senseamp +  
2541
    power->rs_decoder + power->rs_wordline +
2542
    power->rs_bitline + power->rs_senseamp + 
2543
    power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
2544
    power->lsq_rs_decoder + power->lsq_rs_wordline +
2545
    power->lsq_rs_bitline + power->lsq_rs_senseamp +
2546
    power->resultbus +
2547
    power->clock_power +
2548
    power->icache_power + 
2549
    power->itlb + 
2550
    power->dcache_power + 
2551
    power->dtlb + 
2552
    power->dcache2_power;
2553

    
2554
  power->bpred_power = power->btb + power->local_predict + power->global_predict + power->chooser + power->ras;
2555

    
2556
  power->rat_power = power->rat_decoder + 
2557
    power->rat_wordline + power->rat_bitline + power->rat_senseamp;
2558

    
2559
  power->dcl_power = power->dcl_compare + power->dcl_pencode;
2560

    
2561
  power->rename_power = power->rat_power + 
2562
    power->dcl_power + 
2563
    power->inst_decoder_power;
2564

    
2565
  power->wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch + 
2566
    power->wakeup_ormatch;
2567

    
2568
  power->rs_power = power->rs_decoder + 
2569
    power->rs_wordline + power->rs_bitline + power->rs_senseamp;
2570

    
2571
  power->rs_power_nobit = power->rs_decoder + 
2572
    power->rs_wordline + power->rs_senseamp;
2573

    
2574
  power->window_power = power->wakeup_power + power->rs_power + 
2575
    power->selection;
2576

    
2577
  power->lsq_rs_power = power->lsq_rs_decoder + 
2578
    power->lsq_rs_wordline + power->lsq_rs_bitline + 
2579
    power->lsq_rs_senseamp;
2580

    
2581
  power->lsq_rs_power_nobit = power->lsq_rs_decoder + 
2582
    power->lsq_rs_wordline + power->lsq_rs_senseamp;
2583
   
2584
  power->lsq_wakeup_power = power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch;
2585

    
2586
  power->lsq_power = power->lsq_wakeup_power + power->lsq_rs_power;
2587

    
2588
  power->regfile_power = power->regfile_decoder + 
2589
    power->regfile_wordline + power->regfile_bitline + 
2590
    power->regfile_senseamp;
2591

    
2592
  power->regfile_power_nobit = power->regfile_decoder + 
2593
    power->regfile_wordline + power->regfile_senseamp;
2594

    
2595
  dump_power_stats(power);
2596

    
2597
}